diff --git a/README.md b/README.md index 4970eccbacb5ecd9fb30f2dcde065d43b672d6db..3f054d92b3965ec7cecec1af391c179608e1e719 100644 --- a/README.md +++ b/README.md @@ -7,24 +7,42 @@ This repository contains miscellaneous supporting materals for HPVM. [PPoPP'18 paper](http://rsim.cs.illinois.edu/Pubs/17-PPOPP-HPVM.pdf) ## Dependencies -You would need to download and install the following components for using AVX and NVIDIA GPUs to speed up your programs +You would need to download and install the following components for using NVIDIA GPUs to speed up your programs * Intel OpenCL SDK for Linux from [software.intel.com/sdk/opencl](software.intel.com/sdk/opencl). Follow the installation instructions (no special requirements). * CUDA -## Build +## Getting source code and building HPVM -Switch to hpvm-reorg-9 +Checkout HPVM: ```shell -git checkout hpvm-reorg-9 +git clone https://gitlab.engr.illinois.edu/llvm/hpvm.git +git checkout hpvm-reorg-9 (this step may not be needed once code is mirrored on Github) ``` -Build hpvm +HPVM installer script can be used to dowwnload, configure and build HPMV along with LLVM and other subprojects including Clang. ```shell bash install.sh ``` +Specifically, the HPVM installer downloads the LLVM, Clang, compiler-rt, libcxxabi and lld, copies HPVM source into +llvm/tools and build the entire tree. LLVM C-Backend is also built as a part of HPVM and is currently used to perform +code generation in OpenCL for GPUs. -Build hpvm runtime +Alternatively, CMake can be run manually. +```shell +cd hpvm/build +cmake ../llvm [options] +``` +Some common options that can be used with CMake are: + +* -DCMAKE_INSTALL_PREFIX=directory --- Specify for directory the full pathname of where you want the HPVM tools and libraries to be installed. + +* -DCMAKE_BUILD_TYPE=type --- Valid options for type are Debug, Release, RelWithDebInfo, and MinSizeRel. Default is Debug. + +* -DLLVM_ENABLE_ASSERTIONS=On --- Compile with assertion checks enabled (default is Yes for Debug builds, No for all other build types). + +## Building hpvm runtime +HPVM also includes a runtime library which comprises of low-level, target-specific wrappers required by HPVM's code generation. ```shell cd projects/visc-rt make @@ -40,14 +58,3 @@ export LLVM_SRC_ROOT=<full path to hpvm>/llvm Benchmark suites have been migrated to the LLVM 4.0 build. They are located in [VISC](/llvm/test/VISC/parboil/benchmarks). -### Running an example (sgemm in parboil) -```shell -cd llvm/test/VISC/parboil/benchmarks/sgemm -make -make run -``` - -### Other Old Components - -Search this repository for "visc", case-insensitive. - diff --git a/hpvm/include/SupportVISC/DFGraph.h b/hpvm/include/SupportVISC/DFGraph.h index de0f5f9945038287b671642e385d5d466e466bce..ccc78a3357b3abc48d12524e05cd8b37e91f0a1d 100644 --- a/hpvm/include/SupportVISC/DFGraph.h +++ b/hpvm/include/SupportVISC/DFGraph.h @@ -403,25 +403,16 @@ public: void setHasX86FuncForTarget(visc::Target T, bool isX86Func) { switch (T) { + case visc::None: + return; // Do nothing. case visc::CPU_TARGET: GenFuncInfo.cpu_hasX86Func = isX86Func; break; case visc::GPU_TARGET: GenFuncInfo.gpu_hasX86Func = isX86Func; break; - case visc::SPIR_TARGET: - GenFuncInfo.spir_hasX86Func = isX86Func; - break; - case visc::CUDNN_TARGET: - GenFuncInfo.cudnn_hasX86Func = isX86Func; - break; - case visc::PROMISE_TARGET: - GenFuncInfo.promise_hasX86Func = isX86Func; - break; case visc::CPU_OR_GPU_TARGET: - case visc::CPU_OR_SPIR_TARGET: - assert(false && "Single target expected (CPU/GPU/SPIR)\n"); - break; + break; default: assert(false && "Unknown target\n"); break; @@ -431,25 +422,18 @@ public: bool hasX86GenFuncForTarget(visc::Target T) const { switch (T) { + case visc::None: + return false; case visc::CPU_TARGET: return GenFuncInfo.cpu_hasX86Func; case visc::GPU_TARGET: return GenFuncInfo.gpu_hasX86Func; - case visc::SPIR_TARGET: - return GenFuncInfo.spir_hasX86Func; - case visc::CUDNN_TARGET: - return GenFuncInfo.cudnn_hasX86Func; - case visc::PROMISE_TARGET: - return GenFuncInfo.promise_hasX86Func; case visc::CPU_OR_GPU_TARGET: - case visc::CPU_OR_SPIR_TARGET: assert(false && "Single target expected (CPU/GPU/SPIR/CUDNN/PROMISE)\n"); - return false; default: assert(false && "Unknown target\n"); - return false; } - + return false; } void addGenFunc(Function* F, visc::Target T, bool isX86Func) { @@ -471,39 +455,12 @@ public: GenFuncs.GPUGenFunc = F; GenFuncInfo.gpu_hasX86Func = isX86Func; break; - case visc::SPIR_TARGET: - if (GenFuncs.SPIRGenFunc != NULL) { - errs() << "Warning: Second generated SPIR function for node " - << FuncPointer->getName() << "\n"; - } - GenFuncs.SPIRGenFunc = F; - GenFuncInfo.spir_hasX86Func = isX86Func; - break; - case visc::CUDNN_TARGET: - if (GenFuncs.CUDNNGenFunc != NULL) { - errs() << "Warning: Second generated CUDNN function for node " - << FuncPointer->getName() << "\n"; - } - GenFuncs.CUDNNGenFunc = F; - GenFuncInfo.cudnn_hasX86Func = isX86Func; - break; - case visc::PROMISE_TARGET: - if (GenFuncs.PROMISEGenFunc != NULL) { - errs() << "Warning: Second generated PROMISE function for node " - << FuncPointer->getName() << "\n"; - } - GenFuncs.PROMISEGenFunc = F; - GenFuncInfo.promise_hasX86Func = isX86Func; - break; case visc::CPU_OR_GPU_TARGET: - case visc::CPU_OR_SPIR_TARGET: assert(false && "A node function should be set with a tag specifying its \ type, not the node hint itself\n"); - break; default: assert(false && "Unknown target for generated function\n"); - break; } Tag = viscUtils::getUpdatedTag(Tag,T); @@ -511,30 +468,26 @@ public: Function* getGenFuncForTarget(visc::Target T) const { switch (T) { + case visc::None: + return NULL; case visc::CPU_TARGET: return GenFuncs.CPUGenFunc; case visc::GPU_TARGET: return GenFuncs.GPUGenFunc; - case visc::SPIR_TARGET: - return GenFuncs.SPIRGenFunc; - case visc::CUDNN_TARGET: - return GenFuncs.CUDNNGenFunc; - case visc::PROMISE_TARGET: - return GenFuncs.PROMISEGenFunc; case visc::CPU_OR_GPU_TARGET: - case visc::CPU_OR_SPIR_TARGET: assert(false && "Requesting genarated node function with dual tag instead of \ CPU/GPU/SPIR/CUDNN/PROMISE\n"); - return NULL; default: assert(false && "Unknown target for generated function\n"); - return NULL; } + return NULL; } void removeGenFuncForTarget(visc::Target T) { switch (T) { + case visc::None: + return; case visc::CPU_TARGET: GenFuncs.CPUGenFunc = NULL; GenFuncInfo.cpu_hasX86Func = false; @@ -543,27 +496,12 @@ public: GenFuncs.GPUGenFunc = NULL; GenFuncInfo.gpu_hasX86Func = false; break; - case visc::SPIR_TARGET: - GenFuncs.SPIRGenFunc = NULL; - GenFuncInfo.spir_hasX86Func = false; - break; - case visc::CUDNN_TARGET: - GenFuncs.CUDNNGenFunc = NULL; - GenFuncInfo.cudnn_hasX86Func = false; - break; - case visc::PROMISE_TARGET: - GenFuncs.PROMISEGenFunc = NULL; - GenFuncInfo.promise_hasX86Func = false; - break; case visc::CPU_OR_GPU_TARGET: - case visc::CPU_OR_SPIR_TARGET: assert(false && "Removing genarated node function with dual tag instead of \ CPU/GPU/SPIR/CUDNN/PROMISE\n"); - break; default: assert(false && "Unknown target for generated function\n"); - break; } return; } diff --git a/hpvm/include/SupportVISC/VISCUtils.h b/hpvm/include/SupportVISC/VISCUtils.h index cfd93467922dc4c7039168f247deb16bc26a5248..76c80ed94d83b0dd98ed9d896914ab78e142c93b 100644 --- a/hpvm/include/SupportVISC/VISCUtils.h +++ b/hpvm/include/SupportVISC/VISCUtils.h @@ -120,12 +120,8 @@ void fixHintMetadata(Module &M, Function* F, Function* G) { }; FixHint("visc_hint_gpu"); - FixHint("visc_hint_spir"); - FixHint("visc_hint_cudnn"); - FixHint("visc_hint_promise"); FixHint("visc_hint_cpu"); FixHint("visc_hint_cpu_gpu"); - FixHint("visc_hint_cpu_spir"); } // Assuming that the changed function is a node function, it is only used as a @@ -294,72 +290,6 @@ Function* cloneFunction(Function* F, FunctionType* newFT, return newF; } -// Create new function F' as a copy of old function F with a new signature. -// The following two most used cases are handled by this function. -// 1. When some extra arguments need to be added to this function -// - Here we can map the old function arguments to -// new ones -// 2. When each pointer argument needs an additional size argument -// - Here, in the absence of VMap, we map the arguments in order, skipping -// over extra pointer arguments. -// The function returns the list of return instructions to the caller to fix in -// case the return type is also changed. -//Function* cloneFunction(Function* F, FunctionType* newFT, bool -// isAddingPtrSizeArg, SmallVectorImpl<ReturnInst*>* Returns = NULL) { -// -// DEBUG(errs() << "Cloning Function: " << F->getName() << "\n"); -// DEBUG(errs() << "Old Function Type: " << *F->getFunctionType() << "\n"); -// DEBUG(errs() << "New Function Type: " << *newFT << "\n"); -// -// assert(F->getFunctionType()->getNumParams() <= newFT->getNumParams() -// && "This function assumes that the new function has more arguments than the old function!"); -// -// // Create Function of specified type -// Function* newF = Function::Create(newFT, F->getLinkage(), F->getName()+"_cloned", F->getParent()); -// DEBUG(errs() << "Old Function name: " << F->getName() << "\n"); -// DEBUG(errs() << "New Function name: " << newF->getName() << "\n"); -// ValueToValueMapTy VMap; -// DEBUG(errs() << "No value map provided. Creating default value map\n"); -// if(isAddingPtrSizeArg) { -// DEBUG(errs() << "Case 1: Pointer arg followed by a i64 size argument in new function\n"); -// Function::arg_iterator new_ai = newF->arg_begin(); -// for(Function::arg_iterator ai = F->arg_begin(), ae = F->arg_end(); -// ai != ae; ++ai) { -// DEBUG(errs() << ai->getArgNo() << ". " << *ai << " : " << *new_ai << "\n"); -// assert(ai->getType() == new_ai->getType() && "Arguments type do not match!"); -// VMap[&*ai] = &*new_ai; -// new_ai->takeName(&*ai); -// if(ai->getType()->isPointerTy()) { -// std::string oldName = new_ai->getName(); -// // If the current argument is pointer type, the next argument in new -// // function would be an i64 type containing the data size of this -// // argument. Hence, skip the next arguement in new function. -// ++new_ai; -// new_ai->setName("bytes_"+oldName); -// } -// ++new_ai; -// } -// } -// else { -// DEBUG(errs() << "Case 2: Extra arguments are added at the end of old function\n"); -// Function::arg_iterator new_ai = newF->arg_begin(); -// for(Function::arg_iterator ai = F->arg_begin(), ae = F->arg_end(); -// ai != ae; ++ai, ++new_ai) { -// DEBUG(errs() << ai->getArgNo() << ". " << *ai << " : " << *new_ai << "\n"); -// assert(ai->getType() == new_ai->getType() && "Arguments type do not match!"); -// VMap[&*ai] = &*new_ai; -// new_ai->takeName(&*ai); -// } -// } -// -// // Clone function -// if (Returns == NULL) -// Returns = new SmallVector<ReturnInst*, 8>(); -// CloneFunctionInto(newF, F, VMap, false, *Returns); -// -// return newF; -//} - // Overloaded version of cloneFunction Function *cloneFunction(Function *F, Function *newF, bool isAddingPtrSizeArg, @@ -435,30 +365,12 @@ bool tagIncludesTarget(visc::Target Tag, visc::Target T) { if (T == visc::GPU_TARGET) return true; return false; - case visc::SPIR_TARGET: - if (T == visc::SPIR_TARGET) - return true; - return false; - case visc::CUDNN_TARGET: - if (T == visc::CUDNN_TARGET) - return true; - return false; - case visc::PROMISE_TARGET: - if (T == visc::PROMISE_TARGET) - return true; - return false; case visc::CPU_OR_GPU_TARGET: if ((T == visc::CPU_TARGET) || (T == visc::GPU_TARGET) || (T == visc::CPU_OR_GPU_TARGET)) return true; return false; - case visc::CPU_OR_SPIR_TARGET: - if ((T == visc::CPU_TARGET) || - (T == visc::SPIR_TARGET) || - (T == visc::CPU_OR_SPIR_TARGET)) - return true; - return false; default: assert(false && "Unknown Target\n"); } @@ -466,62 +378,32 @@ bool tagIncludesTarget(visc::Target Tag, visc::Target T) { bool isSingleTargetTag(visc::Target T) { return ((T == visc::CPU_TARGET) || - (T == visc::GPU_TARGET) || - (T == visc::SPIR_TARGET) || - (T == visc::CUDNN_TARGET) || - (T == visc::PROMISE_TARGET)); + (T == visc::GPU_TARGET)); } // Add the specified target to the given tag visc::Target getUpdatedTag(visc::Target Tag, visc::Target T) { assert(((T == visc::CPU_TARGET) || - (T == visc::GPU_TARGET) || - (T == visc::SPIR_TARGET) || - (T == visc::CUDNN_TARGET) || - (T == visc::PROMISE_TARGET)) && + (T == visc::GPU_TARGET)) && "The target is only allowed to be a single target: CPU, GPU, SPIR, CUDNN, PROMISE\n"); switch (Tag) { case visc::None: return T; case visc::CPU_TARGET: - assert((T != visc::CUDNN_TARGET) && (T != visc::PROMISE_TARGET) && - "Unsupported target combination\n"); if (T == visc::CPU_TARGET) return visc::CPU_TARGET; if (T == visc::GPU_TARGET) return visc::CPU_OR_GPU_TARGET; - if (T == visc::SPIR_TARGET) - return visc::CPU_OR_SPIR_TARGET; return T; case visc::GPU_TARGET: - assert((T != visc::SPIR_TARGET) && "Unsupported target combination\n"); - assert((T != visc::CUDNN_TARGET) && (T != visc::PROMISE_TARGET) && - "Unsupported target combination\n"); if (T == visc::CPU_TARGET) return visc::CPU_OR_GPU_TARGET; if (T == visc::GPU_TARGET) return visc::GPU_TARGET; return T; - case visc::SPIR_TARGET: - assert((T != visc::GPU_TARGET) && "Unsupported target combination\n"); - assert((T != visc::CUDNN_TARGET) && (T != visc::PROMISE_TARGET) && - "Unsupported target combination\n"); - if (T == visc::CPU_TARGET) - return visc::CPU_OR_SPIR_TARGET; - if (T == visc::SPIR_TARGET) - return visc::SPIR_TARGET; - return T; case visc::CPU_OR_GPU_TARGET: - assert((T != visc::CUDNN_TARGET) && (T != visc::PROMISE_TARGET) && - "Unsupported target combination\n"); - assert((T != visc::SPIR_TARGET) && "Unsupported target combination\n"); return visc::CPU_OR_GPU_TARGET; - case visc::CPU_OR_SPIR_TARGET: - assert((T != visc::CUDNN_TARGET) && (T != visc::PROMISE_TARGET) && - "Unsupported target combination\n"); - assert((T != visc::GPU_TARGET) && "Unsupported target combination\n"); - return visc::CPU_OR_SPIR_TARGET; default: assert(false && "Unknown Target\n"); } @@ -541,18 +423,6 @@ void addHint(Function* F, visc::Target T) { DEBUG(errs() << "GPU Target\n"); HintNode = M->getOrInsertNamedMetadata("visc_hint_gpu"); break; - case visc::SPIR_TARGET: - DEBUG(errs() << "SPIR Target\n"); - HintNode = M->getOrInsertNamedMetadata("visc_hint_spir"); - break; - case visc::CUDNN_TARGET: - DEBUG(errs() << "CUDNN Target\n"); - HintNode = M->getOrInsertNamedMetadata("visc_hint_cudnn"); - break; - case visc::PROMISE_TARGET: - DEBUG(errs() << "PROMISE Target\n"); - HintNode = M->getOrInsertNamedMetadata("visc_hint_promise"); - break; case visc::CPU_TARGET: DEBUG(errs() << "CPU Target\n"); HintNode = M->getOrInsertNamedMetadata("visc_hint_cpu"); @@ -561,10 +431,6 @@ void addHint(Function* F, visc::Target T) { DEBUG(errs() << "CPU or GPU Target\n"); HintNode = M->getOrInsertNamedMetadata("visc_hint_cpu_gpu"); break; - case visc::CPU_OR_SPIR_TARGET: - DEBUG(errs() << "CPU or SPIR Target\n"); - HintNode = M->getOrInsertNamedMetadata("visc_hint_cpu_spir"); - break; default: llvm_unreachable("Unsupported Target Hint!"); break; @@ -587,21 +453,9 @@ void removeHint(Function* F, visc::Target T) { case visc::GPU_TARGET: HintNode = M->getOrInsertNamedMetadata("visc_hint_gpu"); break; - case visc::SPIR_TARGET: - HintNode = M->getOrInsertNamedMetadata("visc_hint_spir"); - break; - case visc::CUDNN_TARGET: - HintNode = M->getOrInsertNamedMetadata("visc_hint_cudnn"); - break; - case visc::PROMISE_TARGET: - HintNode = M->getOrInsertNamedMetadata("visc_hint_promise"); - break; case visc::CPU_OR_GPU_TARGET: HintNode = M->getOrInsertNamedMetadata("visc_hint_cpu_gpu"); break; - case visc::CPU_OR_SPIR_TARGET: - HintNode = M->getOrInsertNamedMetadata("visc_hint_cpu_spir"); - break; case visc::CPU_TARGET: HintNode = M->getOrInsertNamedMetadata("visc_hint_cpu"); break; @@ -648,11 +502,7 @@ visc::Target getPreferredTarget(Function* F) { if(FoundPrefTarget("visc_hint_cpu")) return visc::CPU_TARGET; if(FoundPrefTarget("visc_hint_gpu")) return visc::GPU_TARGET; - if(FoundPrefTarget("visc_hint_spir")) return visc::SPIR_TARGET; - if(FoundPrefTarget("visc_hint_cudnn")) return visc::CUDNN_TARGET; - if(FoundPrefTarget("visc_hint_promise")) return visc::PROMISE_TARGET; if(FoundPrefTarget("visc_hint_cpu_gpu")) return visc::CPU_OR_GPU_TARGET; - if(FoundPrefTarget("visc_hint_cpu_spir")) return visc::CPU_OR_SPIR_TARGET; return visc::None; } diff --git a/hpvm/lib/Transforms/DFG2LLVM_X86/DFG2LLVM_X86.cpp b/hpvm/lib/Transforms/DFG2LLVM_X86/DFG2LLVM_X86.cpp index 06e4e79183d726bb80113264f3cc7da0a4701ecf..ca5495f06d3887c506ecf52cccb839d8fdba1555 100644 --- a/hpvm/lib/Transforms/DFG2LLVM_X86/DFG2LLVM_X86.cpp +++ b/hpvm/lib/Transforms/DFG2LLVM_X86/DFG2LLVM_X86.cpp @@ -1466,11 +1466,9 @@ void CGT_X86::codeGen(DFInternalNode* N) { // For now, use node function name and change it later Function *CF = N->getGenFuncForTarget(visc::CPU_TARGET); Function *GF = N->getGenFuncForTarget(visc::GPU_TARGET); - Function *SF = N->getGenFuncForTarget(visc::SPIR_TARGET); bool CFx86 = N->hasX86GenFuncForTarget(visc::CPU_TARGET); bool GFx86 = N->hasX86GenFuncForTarget(visc::GPU_TARGET); - bool SFx86 = N->hasX86GenFuncForTarget(visc::SPIR_TARGET); errs() << "Node: " << N->getFuncPointer()->getName() << " with tag " << N->getTag() << "\n"; @@ -1478,8 +1476,6 @@ void CGT_X86::codeGen(DFInternalNode* N) { errs() << "hasx86GenFuncForCPU : " << CFx86 << "\n"; errs() << "GPU Fun: " << (GF ? GF->getName() : "null" ) << "\n"; errs() << "hasx86GenFuncForGPU : " << GFx86 << "\n"; - errs() << "SPIR Fun: " << (SF ? SF->getName() : "null" ) << "\n"; - errs() << "hasx86GenFuncForSPIR : " << SFx86 << "\n"; if (N->getTag() == visc::None) { @@ -1503,24 +1499,12 @@ void CGT_X86::codeGen(DFInternalNode* N) { assert(N->hasX86GenFuncForTarget(visc::CPU_TARGET) && ""); assert(!(N->getGenFuncForTarget(visc::GPU_TARGET)) && ""); assert(!(N->hasX86GenFuncForTarget(visc::GPU_TARGET)) && ""); - assert(!(N->getGenFuncForTarget(visc::SPIR_TARGET)) && ""); - assert(!(N->hasX86GenFuncForTarget(visc::SPIR_TARGET)) && ""); break; case visc::GPU_TARGET: assert(!(N->getGenFuncForTarget(visc::CPU_TARGET)) && ""); assert(!(N->hasX86GenFuncForTarget(visc::CPU_TARGET)) && ""); assert(N->getGenFuncForTarget(visc::GPU_TARGET) && ""); assert(N->hasX86GenFuncForTarget(visc::GPU_TARGET) && ""); - assert(!(N->getGenFuncForTarget(visc::SPIR_TARGET)) && ""); - assert(!(N->hasX86GenFuncForTarget(visc::SPIR_TARGET)) && ""); - break; - case visc::SPIR_TARGET: - assert(!(N->getGenFuncForTarget(visc::CPU_TARGET)) && ""); - assert(!(N->hasX86GenFuncForTarget(visc::CPU_TARGET)) && ""); - assert(!(N->getGenFuncForTarget(visc::GPU_TARGET)) && ""); - assert(!(N->hasX86GenFuncForTarget(visc::GPU_TARGET)) && ""); - assert(N->getGenFuncForTarget(visc::SPIR_TARGET) && ""); - assert(N->hasX86GenFuncForTarget(visc::SPIR_TARGET) && ""); break; default: assert(false && "Unreachable: we checked that tag was single target!\n"); @@ -1536,9 +1520,6 @@ void CGT_X86::codeGen(DFInternalNode* N) { case visc::GPU_TARGET: NodeGenFunc = N->getGenFuncForTarget(visc::GPU_TARGET); break; - case visc::SPIR_TARGET: - NodeGenFunc = N->getGenFuncForTarget(visc::SPIR_TARGET); - break; default: break; } @@ -1558,7 +1539,6 @@ void CGT_X86::codeGen(DFInternalNode* N) { Function *Ftmp = N->getGenFuncForTarget(N->getTag()); N->removeGenFuncForTarget(visc::GPU_TARGET); - N->removeGenFuncForTarget(visc::SPIR_TARGET); N->setTag(visc::None); N->addGenFunc(Ftmp, visc::CPU_TARGET, true); N->setTag(visc::CPU_TARGET); @@ -1566,11 +1546,9 @@ void CGT_X86::codeGen(DFInternalNode* N) { // Sanity checks - to be removed TODO CF = N->getGenFuncForTarget(visc::CPU_TARGET); GF = N->getGenFuncForTarget(visc::GPU_TARGET); - SF = N->getGenFuncForTarget(visc::SPIR_TARGET); CFx86 = N->hasX86GenFuncForTarget(visc::CPU_TARGET); GFx86 = N->hasX86GenFuncForTarget(visc::GPU_TARGET); - SFx86 = N->hasX86GenFuncForTarget(visc::SPIR_TARGET); errs() << "After editing\n"; errs() << "Node: " << N->getFuncPointer()->getName() @@ -1579,8 +1557,6 @@ void CGT_X86::codeGen(DFInternalNode* N) { errs() << "hasx86GenFuncForCPU : " << CFx86 << "\n"; errs() << "GPU Fun: " << (GF ? GF->getName() : "null" ) << "\n"; errs() << "hasx86GenFuncForGPU : " << GFx86 << "\n"; - errs() << "SPIR Fun: " << (SF ? SF->getName() : "null" ) << "\n"; - errs() << "hasx86GenFuncForSPIR : " << SFx86 << "\n"; // assert(false && "got to the point where we have to select\n"); } else { @@ -1591,26 +1567,21 @@ void CGT_X86::codeGen(DFInternalNode* N) { Function *CF = N->getGenFuncForTarget(visc::CPU_TARGET); Function *GF = N->getGenFuncForTarget(visc::GPU_TARGET); - Function *SF = N->getGenFuncForTarget(visc::SPIR_TARGET); bool CFx86 = N->hasX86GenFuncForTarget(visc::CPU_TARGET); bool GFx86 = N->hasX86GenFuncForTarget(visc::GPU_TARGET); - bool SFx86 = N->hasX86GenFuncForTarget(visc::SPIR_TARGET); // These assertions express what we can support with the current runtime. // Code generation works the same way even for other target combinations. // For now, we want either CPU and GPU, or CPU and SPIR - assert((CF && (GF && !SF || !GF && SF)) && "Invalid target selection\n"); - assert((CFx86 && (GFx86 && !SFx86 || !GFx86 && SFx86)) && + assert((CF && (GF || !GF)) && "Invalid target selection\n"); + assert((CFx86 && (GFx86 || !GFx86)) && "Generated functions without appropriate x86 wrapper\n"); FunctionType *FT = CF->getFunctionType(); if (GF) assert(FT == GF->getFunctionType() && "Type mismatch between generated functions for GPU and CPU targets.\n"); - if (SF) - assert(FT == SF->getFunctionType() && - "Type mismatch between generated functions for SPIR and CPU targets.\n"); // Code generation of wrapper function Function *F_wrapper; @@ -1700,28 +1671,28 @@ void CGT_X86::codeGen(DFInternalNode* N) { // Switch basic block pointers BBcurrent = BBfalse; - if (SF) { + // if (SF) { // We have a GPU version. Generate policy check and call - CmpConst = - ConstantInt::get(Type::getInt32Ty(M.getContext()), 2, true); - CmpI = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, - RTFInst, CmpConst, "", BBcurrent); - BBtrue = BasicBlock::Create(M.getContext(), "version_spir", F_wrapper); - BBfalse = BasicBlock::Create(M.getContext(), "not_spir", F_wrapper); - BrI = BranchInst::Create(BBtrue, BBfalse, CmpI, BBcurrent); + // CmpConst = + // ConstantInt::get(Type::getInt32Ty(M.getContext()), 2, true); + //CmpI = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, + // RTFInst, CmpConst, "", BBcurrent); + //BBtrue = BasicBlock::Create(M.getContext(), "version_spir", F_wrapper); + // BBfalse = BasicBlock::Create(M.getContext(), "not_spir", F_wrapper); + // BrI = BranchInst::Create(BBtrue, BBfalse, CmpI, BBcurrent); - GenFuncCI = CallInst::Create(SF, GenFuncCallArgs, "", BBtrue); - RI = ReturnInst::Create(M.getContext(), GenFuncCI, BBtrue); + // GenFuncCI = CallInst::Create(SF, GenFuncCallArgs, "", BBtrue); + // RI = ReturnInst::Create(M.getContext(), GenFuncCI, BBtrue); - if (DeviceAbstraction) { + // if (DeviceAbstraction) { // Prepare arguments and function for call to wait for device runtime call - std::vector<Value *> Args; // TODO: add the device type as argument? - FunctionCallee RTF = - M.getOrInsertFunction("llvm_visc_deviceAbstraction_waitOnDeviceStatus", - runtimeModule->getFunction("llvm_visc_deviceAbstraction_waitOnDeviceStatus")->getFunctionType()); - CallInst *RTFInst = CallInst::Create(RTF, Args, "", GenFuncCI); - } - } + // std::vector<Value *> Args; // TODO: add the device type as argument? + // FunctionCallee RTF = + // M.getOrInsertFunction("llvm_visc_deviceAbstraction_waitOnDeviceStatus", + // runtimeModule->getFunction("llvm_visc_deviceAbstraction_waitOnDeviceStatus")->getFunctionType()); + // CallInst *RTFInst = CallInst::Create(RTF, Args, "", GenFuncCI); + // } + // } RI = ReturnInst::Create(M.getContext(), UndefValue::get(FT->getReturnType()), BBfalse); @@ -1730,7 +1701,6 @@ void CGT_X86::codeGen(DFInternalNode* N) { // Remove all other versions and update the tag N->addGenFunc(F_wrapper, visc::CPU_TARGET, true); N->removeGenFuncForTarget(visc::GPU_TARGET); - N->removeGenFuncForTarget(visc::SPIR_TARGET); N->setTag(visc::CPU_TARGET); // assert(false && "got to the point where we have to combine\n"); @@ -1774,11 +1744,6 @@ void CGT_X86::codeGen(DFLeafNode* N) { // by design of DFG2LLVM_NVPTX backend assert(!(N->hasX86GenFuncForTarget(visc::GPU_TARGET)) && ""); break; - case visc::SPIR_TARGET: - // A leaf node should not have an x86 function for SPIR - // by design of DFG2LLVM_SPIR backend - assert(!(N->hasX86GenFuncForTarget(visc::SPIR_TARGET)) && ""); - break; default: break; } diff --git a/hpvm/test/parboil/benchmarks/lbm/Makefile b/hpvm/test/parboil/benchmarks/lbm/Makefile index ee7bcb33b4c22f2d47bada51a9d89b0cddf270b2..4ebf6fc0af2f05cd10f6d556e0b52bee186540d8 100644 --- a/hpvm/test/parboil/benchmarks/lbm/Makefile +++ b/hpvm/test/parboil/benchmarks/lbm/Makefile @@ -1,4 +1,4 @@ -PARBOIL_ROOT = $(LLVM_SRC_ROOT)/../test/parboil +PARBOIL_ROOT = $(LLVM_SRC_ROOT)/tools/hpvm/test/parboil APP = lbm ifeq ($(NUM_CORES),) diff --git a/hpvm/test/parboil/benchmarks/pipeline/Makefile b/hpvm/test/parboil/benchmarks/pipeline/Makefile index a83cacc2cbd0b95ba6101c2dfbcb33929171dcf3..5f0fd5b73066b1ebfc9211fa628d7f326c88ca5e 100644 --- a/hpvm/test/parboil/benchmarks/pipeline/Makefile +++ b/hpvm/test/parboil/benchmarks/pipeline/Makefile @@ -31,7 +31,7 @@ BIN = $(addsuffix -$(VERSION), $(APP)) SRCDIR = src/$(VERSION) BUILDDIR = build/$(VERSION)_$(PLATFORM) -DATASET_DIR = $(PARBOIL_ROOT)/datasets/$(APP) +DATASET_DIR ?= $(PARBOIL_ROOT)/datasets/$(APP) IMAGE = $(DATASET_DIR)/$(TEST)/input/edgetest_10.png VIDEO1 = $(DATASET_DIR)/$(TEST)/input/taxi/taxi01.pgm diff --git a/hpvm/test/parboil/benchmarks/pipeline/src/visc_parallel/Makefile b/hpvm/test/parboil/benchmarks/pipeline/src/visc_parallel/Makefile index 6dc39540f209760564005b51d7fcbdd046eaac4b..ea6b44788d0831221b2d289d1904d35f56240615 100644 --- a/hpvm/test/parboil/benchmarks/pipeline/src/visc_parallel/Makefile +++ b/hpvm/test/parboil/benchmarks/pipeline/src/visc_parallel/Makefile @@ -4,9 +4,11 @@ LANGUAGE=visc SRCDIR_OBJS=io.ll #compute_gold.o VISC_OBJS=main.visc.ll APP_CUDALDFLAGS=-lm -lstdc++ -APP_CFLAGS+=-ffast-math -O3 -I/opt/opencv/include -APP_CXXFLAGS+=-ffast-math -O3 -I/opt/opencv/include -APP_LDFLAGS=`pkg-config ${OpenCV_DIR}/lib/pkgconfig/opencv.pc --libs` +APP_CFLAGS+=-ffast-math -O3 -fno-lax-vector-conversions -fno-vectorize -fno-slp-vectorize #-I/shared/opencv/include +APP_CXXFLAGS+=-ffast-math -O3 -fno-lax-vector-conversions -fno-vectorize -fno-slp-vectorize #-I/shared/opencv/include +OpenCV_DIR=/shared/opencv +APP_LDFLAGS=`pkg-config opencv --libs` +#APP_LDFLAGS=`pkg-config ${OpenCV_DIR}/lib/pkgconfig/opencv.pc --libs` #APP_LDFLAGS=-L/usr/local/cuda/lib64 -rdynamic /opt/opencv/lib/libopencv_videostab.so.3.0.0 /opt/opencv/lib/libopencv_videoio.so.3.0.0 /opt/opencv/lib/libopencv_video.so.3.0.0 /opt/opencv/lib/libopencv_superres.so.3.0.0 /opt/opencv/lib/libopencv_stitching.so.3.0.0 /opt/opencv/lib/libopencv_shape.so.3.0.0 /opt/opencv/lib/libopencv_photo.so.3.0.0 /opt/opencv/lib/libopencv_objdetect.so.3.0.0 /opt/opencv/lib/libopencv_ml.so.3.0.0 /opt/opencv/lib/libopencv_imgproc.so.3.0.0 /opt/opencv/lib/libopencv_imgcodecs.so.3.0.0 /opt/opencv/lib/libopencv_highgui.so.3.0.0 /opt/opencv/lib/libopencv_hal.a /opt/opencv/lib/libopencv_flann.so.3.0.0 /opt/opencv/lib/libopencv_features2d.so.3.0.0 /opt/opencv/lib/libopencv_core.so.3.0.0 /opt/opencv/lib/libopencv_calib3d.so.3.0.0 /opt/opencv/lib/libopencv_hal.a -ldl -lm -lpthread -lrt /opt/opencv/share/OpenCV/3rdparty/lib/libippicv.a -Wl,-rpath,/usr/local/cuda/lib64:/opt/opencv/lib #OpenCV link flags all diff --git a/hpvm/test/parboil/benchmarks/pipeline/src/visc_parallel/main.cc b/hpvm/test/parboil/benchmarks/pipeline/src/visc_parallel/main.cc index 12b3d42a66c369e97fbd3a08a0d179334f245153..892cdabd090412c80f3a2d26ffd3e7c183650ade 100644 --- a/hpvm/test/parboil/benchmarks/pipeline/src/visc_parallel/main.cc +++ b/hpvm/test/parboil/benchmarks/pipeline/src/visc_parallel/main.cc @@ -11,7 +11,7 @@ */ #include "opencv2/opencv.hpp" -#include "opencv2/core/ocl.hpp" +#include "opencv2/ocl/ocl.hpp" #include <stdio.h> #include <math.h> #include <stdlib.h> @@ -201,7 +201,7 @@ void gaussianSmoothing(float *I, size_t bytesI, Is[gloc] = smoothedVal; } - __visc__return(m, n); + __visc__return(2, m, n); } void WrapperGaussianSmoothing(float *I, size_t bytesI, @@ -210,7 +210,7 @@ void WrapperGaussianSmoothing(float *I, size_t bytesI, long m, long n) { __visc__hint(visc::CPU_TARGET); __visc__attributes(2, I, Gs, 1, Is); - void* GSNode = __visc__createNode2D(gaussianSmoothing, m, n); + void* GSNode = __visc__createNodeND(2, gaussianSmoothing, m, n); __visc__bindIn(GSNode, 0, 0, 0); // Bind I __visc__bindIn(GSNode, 1, 1, 0); // Bind bytesI __visc__bindIn(GSNode, 2, 2, 0); // Bind Gs @@ -241,7 +241,7 @@ void laplacianEstimate(float *Is, size_t bytesIs, __visc__hint(visc::DEVICE); __visc__attributes(2, Is, B, 1, L); // 3x3 image area - float imageArea[SZB][SZB]; + float imageArea[SZB*SZB]; //int gx = get_global_id(0); //int gy = get_global_id(1); @@ -255,64 +255,64 @@ void laplacianEstimate(float *Is, size_t bytesIs, if ((gx < n) && (gy < m)) { // Data copy for dilation filter - imageArea[1][1] = Is[gy * n + gx]; + imageArea[1 * SZB +1] = Is[gy * n + gx]; if (gx == 0) { - imageArea[0][0] = imageArea[1][0] = imageArea[2][0] = MIN_BR; + imageArea[0 * SZB +0] = imageArea[1 * SZB +0] = imageArea[2 * SZB +0] = MIN_BR; } else { - imageArea[1][0] = Is[gy * n + gx - 1]; - imageArea[0][0] = (gy > 0) ? Is[(gy - 1) * n + gx - 1] : MIN_BR; - imageArea[2][0] = (gy < m - 1) ? Is[(gy + 1) * n + gx - 1] : MIN_BR; + imageArea[1 * SZB +0] = Is[gy * n + gx - 1]; + imageArea[0 * SZB +0] = (gy > 0) ? Is[(gy - 1) * n + gx - 1] : MIN_BR; + imageArea[2 * SZB +0] = (gy < m - 1) ? Is[(gy + 1) * n + gx - 1] : MIN_BR; } if (gx == n - 1) { - imageArea[0][2] = imageArea[1][2] = imageArea[2][2] = MIN_BR; + imageArea[0 * SZB +2] = imageArea[1 * SZB +2] = imageArea[2 * SZB +2] = MIN_BR; } else { - imageArea[1][2] = Is[gy * n + gx + 1]; - imageArea[0][2] = (gy > 0) ? Is[(gy - 1) * n + gx + 1] : MIN_BR; - imageArea[2][2] = (gy < m - 1) ? Is[(gy + 1) * n + gx + 1] : MIN_BR; + imageArea[1 * SZB +2] = Is[gy * n + gx + 1]; + imageArea[0 * SZB +2] = (gy > 0) ? Is[(gy - 1) * n + gx + 1] : MIN_BR; + imageArea[2 * SZB +2] = (gy < m - 1) ? Is[(gy + 1) * n + gx + 1] : MIN_BR; } - imageArea[0][1] = (gy > 0) ? Is[(gy - 1) * n + gx] : MIN_BR; - imageArea[2][1] = (gy < m - 1) ? Is[(gy + 1) * n + gx] : MIN_BR; + imageArea[0 * SZB +1] = (gy > 0) ? Is[(gy - 1) * n + gx] : MIN_BR; + imageArea[2 * SZB +1] = (gy < m - 1) ? Is[(gy + 1) * n + gx] : MIN_BR; // Compute pixel of dilated image float dilatedPixel = MIN_BR; for (i = 0; i < SZB; i++) for (j = 0; j < SZB; j++) - dilatedPixel = _MAX(dilatedPixel, imageArea[i][j] * B[i*SZB + j]); + dilatedPixel = _MAX(dilatedPixel, imageArea[i * SZB +j] * B[i*SZB + j]); // Data copy for erotion filter - only change the boundary conditions if (gx == 0) { - imageArea[0][0] = imageArea[1][0] = imageArea[2][0] = MAX_BR; + imageArea[0 * SZB +0] = imageArea[1 * SZB +0] = imageArea[2 * SZB +0] = MAX_BR; } else { - if (gy == 0) imageArea[0][0] = MAX_BR; - if (gy == m-1) imageArea[2][0] = MAX_BR; + if (gy == 0) imageArea[0 * SZB +0] = MAX_BR; + if (gy == m-1) imageArea[2 * SZB +0] = MAX_BR; } if (gx == n - 1) { - imageArea[0][2] = imageArea[1][2] = imageArea[2][2] = MAX_BR; + imageArea[0 * SZB +2] = imageArea[1 * SZB +2] = imageArea[2 * SZB +2] = MAX_BR; } else { - if (gy == 0) imageArea[0][2] = MAX_BR; - if (gy == m-1) imageArea[2][2] = MAX_BR; + if (gy == 0) imageArea[0 * SZB +2] = MAX_BR; + if (gy == m-1) imageArea[2 * SZB +2] = MAX_BR; } - if (gy == 0) imageArea[0][1] = MAX_BR; - if (gy == m-1) imageArea[2][1] = MAX_BR; + if (gy == 0) imageArea[0 * SZB +1] = MAX_BR; + if (gy == m-1) imageArea[2 * SZB +1] = MAX_BR; // Compute pixel of eroded image float erodedPixel = MAX_BR; for (i = 0; i < SZB; i++) for (j = 0; j < SZB; j++) - erodedPixel = _MIN(erodedPixel, imageArea[i][j] * B[i*SZB + j]); + erodedPixel = _MIN(erodedPixel, imageArea[i * SZB +j] * B[i*SZB + j]); - float laplacian = dilatedPixel + erodedPixel - 2 * imageArea[1][1]; + float laplacian = dilatedPixel + erodedPixel - 2 * imageArea[1 * SZB +1]; L[gy*n+gx] = laplacian; } //OutStruct output = {bytesB, bytesL}; //if(gx == m-1 && gy == n-1) //std::cout << "Exit laplacian\n"; - __visc__return(m); + __visc__return(1, m); } void WrapperlaplacianEstimate(float *Is, size_t bytesIs, @@ -321,7 +321,7 @@ void WrapperlaplacianEstimate(float *Is, size_t bytesIs, long m, long n) { __visc__hint(visc::CPU_TARGET); __visc__attributes(2, Is, B, 1, L); - void* LNode = __visc__createNode2D(laplacianEstimate, m, n); + void* LNode = __visc__createNodeND(2, laplacianEstimate, m, n); __visc__bindIn(LNode, 0, 0, 0); // Bind Is __visc__bindIn(LNode, 1, 1, 0); // Bind bytesIs __visc__bindIn(LNode, 2, 2, 0); // Bind B @@ -434,7 +434,7 @@ void computeZeroCrossings(float *L, size_t bytesL, //OutStruct output = {bytesB, bytesS}; //if(gx == n-1 && gy == n-1) //std::cout << "Exit ZC\n"; - __visc__return(m); + __visc__return(1, m); } void WrapperComputeZeroCrossings(float *L, size_t bytesL, @@ -443,7 +443,7 @@ void WrapperComputeZeroCrossings(float *L, size_t bytesL, long m, long n) { __visc__hint(visc::CPU_TARGET); __visc__attributes(2, L, B, 1, S); - void* ZCNode = __visc__createNode2D(computeZeroCrossings, m, n); + void* ZCNode = __visc__createNodeND(2, computeZeroCrossings, m, n); __visc__bindIn(ZCNode, 0, 0, 0); // Bind L __visc__bindIn(ZCNode, 1, 1, 0); // Bind bytesL __visc__bindIn(ZCNode, 2, 2, 0); // Bind B @@ -518,10 +518,10 @@ void computeGradient(float *Is, size_t bytesIs, Gy += gval * Sy[(SOBEL_RADIUS + i)*SOBEL_SIZE + SOBEL_RADIUS + j]; } - G[gloc] = __visc__sqrt(Gx*Gx + Gy*Gy); + G[gloc] = sqrt(Gx*Gx + Gy*Gy); //G[gloc] = Gx*Gx + Gy*Gy; } - __visc__return(n); + __visc__return(1, n); } void WrapperComputeGradient(float *Is, size_t bytesIs, @@ -531,7 +531,7 @@ void WrapperComputeGradient(float *Is, size_t bytesIs, long m, long n) { __visc__hint(visc::CPU_TARGET); __visc__attributes(3, Is, Sx, Sy, 1, G); - void* CGNode = __visc__createNode2D(computeGradient, m, n); + void* CGNode = __visc__createNodeND(2, computeGradient, m, n); __visc__bindIn(CGNode, 0, 0, 0); // Bind Is __visc__bindIn(CGNode, 1, 1, 0); // Bind bytesIs __visc__bindIn(CGNode, 2, 2, 0); // Bind Sx @@ -557,7 +557,7 @@ void computeMaxGradientLeaf(float *G, size_t bytesG, float *maxG, size_t bytesMaxG, long m, long n) { - __visc__hint(visc::DEVICE); + __visc__hint(visc::CPU_TARGET); //__visc__hint(visc::CPU_TARGET); __visc__attributes(1, G, 1, maxG); @@ -575,15 +575,16 @@ void computeMaxGradientLeaf(float *G, size_t bytesG, } // First thread iterates over all elements of the thread block - if (lx == 0) { - for (int i = 1; (i < dimx) && (i < m*n); i++) + long bounds = dimx < m*n ? dimx : m*n; + if (lx == 0) { + for (int i = 1; i < bounds; i++) if (G[lx] < G[i]) G[lx] = G[i]; *maxG = G[lx]; } - __visc__return(n); + __visc__return(1, n); } /* @@ -626,7 +627,7 @@ void computeMaxGradientLeaf(float *G, size_t bytesG, //if (lx == 0) //__visc__atomic_max(maxG,G[gid]); - //__visc__return(m); + //__visc__return(1, m); //} void computeMaxGradientTB(float *G, size_t bytesG, @@ -636,7 +637,7 @@ void computeMaxGradientTB(float *G, size_t bytesG, //__visc__hint(visc::DEVICE); __visc__hint(visc::CPU_TARGET); __visc__attributes(2, G, maxG, 1, maxG); - void* CMGLeafNode = __visc__createNode1D(computeMaxGradientLeaf, block_x); + void* CMGLeafNode = __visc__createNodeND(1, computeMaxGradientLeaf, block_x); __visc__bindIn(CMGLeafNode, 0, 0, 0); // Bind G __visc__bindIn(CMGLeafNode, 1, 1, 0); // Bind bytesG __visc__bindIn(CMGLeafNode, 2, 2, 0); // Bind maxG @@ -653,7 +654,7 @@ void WrapperComputeMaxGradient(float *G, size_t bytesG, long block_x, long grid_x) { __visc__hint(visc::CPU_TARGET); __visc__attributes(2, G, maxG, 1, maxG); - void* CMGTBNode = __visc__createNode1D(computeMaxGradientTB, grid_x); + void* CMGTBNode = __visc__createNodeND(1, computeMaxGradientTB, grid_x); __visc__bindIn(CMGTBNode, 0, 0, 0); // Bind G __visc__bindIn(CMGTBNode, 1, 1, 0); // Bind bytesG __visc__bindIn(CMGTBNode, 2, 2, 0); // Bind maxG @@ -693,7 +694,7 @@ void rejectZeroCrossings(float *S, size_t bytesS, if ((gx < n) && (gy < m)) { E[gy*n+gx] = ((S[gy*n+gx] > 0.0) && (G[gy*n+gx] > THETA*mG)) ? 1.0 : 0.0 ; } - __visc__return(m); + __visc__return(1, m); } void WrapperRejectZeroCrossings(float *S, size_t bytesS, @@ -703,7 +704,7 @@ void WrapperRejectZeroCrossings(float *S, size_t bytesS, long m, long n) { __visc__hint(visc::CPU_TARGET); __visc__attributes(3, S, G, maxG, 1, E); - void* RZCNode = __visc__createNode2D(rejectZeroCrossings, m, n); + void* RZCNode = __visc__createNodeND(2, rejectZeroCrossings, m, n); __visc__bindIn(RZCNode, 0, 0 , 0); // Bind S __visc__bindIn(RZCNode, 1, 1 , 0); // Bind bytesS __visc__bindIn(RZCNode, 2, 2 , 0); // Bind G @@ -739,12 +740,12 @@ void edgeDetection(float *I, size_t bytesI, // 0 ) { __visc__attributes(5, I, Gs, B, Sx, Sy, 6, Is, L, S, G, maxG, E); __visc__hint(visc::CPU_TARGET); - void* GSNode = __visc__createNode(WrapperGaussianSmoothing); - void* LNode = __visc__createNode(WrapperlaplacianEstimate); - void* CZCNode = __visc__createNode(WrapperComputeZeroCrossings); - void* CGNode = __visc__createNode(WrapperComputeGradient); - void* CMGNode = __visc__createNode(WrapperComputeMaxGradient); - void* RZCNode = __visc__createNode(WrapperRejectZeroCrossings); + void* GSNode = __visc__createNodeND(0, WrapperGaussianSmoothing); + void* LNode = __visc__createNodeND(0, WrapperlaplacianEstimate); + void* CZCNode = __visc__createNodeND(0, WrapperComputeZeroCrossings); + void* CGNode = __visc__createNodeND(0, WrapperComputeGradient); + void* CMGNode = __visc__createNodeND(0, WrapperComputeMaxGradient); + void* RZCNode = __visc__createNodeND(0, WrapperRejectZeroCrossings); // Gaussian Inputs __visc__bindIn(GSNode, 0 , 0, 1); // Bind I @@ -873,7 +874,7 @@ int main (int argc, char *argv[]) { } int NUM_FRAMES = cap.get(CV_CAP_PROP_FRAME_COUNT); - //NUM_FRAMES = 5; + NUM_FRAMES = 600; std::cout << "Number of frames = " << NUM_FRAMES << "\n"; // Used to store time after each frame computation is completed diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/visc/Makefile b/hpvm/test/parboil/benchmarks/sgemm/src/visc/Makefile index f74ee8921a534b6963ba06d089398114571d070b..d1f6c96d0c279bc2f2e3e70313369d49881b62b8 100644 --- a/hpvm/test/parboil/benchmarks/sgemm/src/visc/Makefile +++ b/hpvm/test/parboil/benchmarks/sgemm/src/visc/Makefile @@ -4,5 +4,5 @@ LANGUAGE=visc SRCDIR_OBJS=io.ll #compute_gold.o VISC_OBJS=main.visc.ll APP_CUDALDFLAGS=-lm -lstdc++ -APP_CFLAGS=-ffast-math -O3 -APP_CXXFLAGS=-ffast-math -O3 +APP_CFLAGS=-ffast-math -O1 +APP_CXXFLAGS=-ffast-math -O1 diff --git a/hpvm/test/parboil/common/include/visc.h b/hpvm/test/parboil/common/include/visc.h index a27d0d003e45ecbff1efffc7c3e1226833894cd7..b0a0f141e575b104f0f3934416956cf9cd1f1904 100644 --- a/hpvm/test/parboil/common/include/visc.h +++ b/hpvm/test/parboil/common/include/visc.h @@ -14,92 +14,92 @@ #ifdef __cplusplus extern "C" { -void __visc__hint(visc::Target); +void __visc__hint(visc::Target) noexcept; #else -void __visc__hint(enum Target); +void __visc__hint(enum Target) noexcept; #endif #ifdef __cplusplus -void* __visc__node(...); -//void* __visc__createNode(...); -//void* __visc__createNode1D(...); -//void* __visc__createNode2D(...); -//void* __visc__createNode3D(...); -//void __visc__return(...); +void* __visc__node(...) noexcept; +//void* __visc__createNode(...) noexcept; +//void* __visc__createNode1D(...) noexcept; +//void* __visc__createNode2D(...) noexcept; +//void* __visc__createNode3D(...) noexcept; +//void __visc__return(...) noexcept; #endif -void* __visc__createNodeND(unsigned, ...); -void __visc__return(unsigned, ...); - -void __visc__attributes(unsigned, ...); -void __visc__init(); -void __visc__cleanup(); - -void __visc__bindIn(void*, unsigned, unsigned, unsigned); -void __visc__bindOut(void*, unsigned, unsigned, unsigned); -void* __visc__edge(void*, void*, unsigned, unsigned, unsigned, unsigned); -void __visc__push(void*, void*); -void* __visc__pop(void*); -void* __visc__launch(unsigned, ...); -void __visc__wait(void*); - -void* __visc__getNode(); -void* __visc__getParentNode(void*); -void __visc__barrier(); -void* __visc__malloc(long); -long __visc__getNodeInstanceID_x(void*); -long __visc__getNodeInstanceID_y(void*); -long __visc__getNodeInstanceID_z(void*); -long __visc__getNumNodeInstances_x(void*); -long __visc__getNumNodeInstances_y(void*); -long __visc__getNumNodeInstances_z(void*); +void* __visc__createNodeND(unsigned, ...) noexcept; +void __visc__return(unsigned, ...) noexcept; + +void __visc__attributes(unsigned, ...) noexcept; +void __visc__init() noexcept; +void __visc__cleanup() noexcept; + +void __visc__bindIn(void*, unsigned, unsigned, unsigned) noexcept; +void __visc__bindOut(void*, unsigned, unsigned, unsigned) noexcept; +void* __visc__edge(void*, void*, unsigned, unsigned, unsigned, unsigned) noexcept; +void __visc__push(void*, void*) noexcept; +void* __visc__pop(void*) noexcept; +void* __visc__launch(unsigned, ...) noexcept; +void __visc__wait(void*) noexcept; + +void* __visc__getNode() noexcept; +void* __visc__getParentNode(void*) noexcept; +void __visc__barrier() noexcept; +void* __visc__malloc(long) noexcept; +long __visc__getNodeInstanceID_x(void*) noexcept; +long __visc__getNodeInstanceID_y(void*) noexcept; +long __visc__getNodeInstanceID_z(void*) noexcept; +long __visc__getNumNodeInstances_x(void*) noexcept; +long __visc__getNumNodeInstances_y(void*) noexcept; +long __visc__getNumNodeInstances_z(void*) noexcept; // Atomic // signed int -int __visc__atomic_cmpxchg(int*, int, int); -int __visc__atomic_add(int*, int); -int __visc__atomic_sub(int*, int); -int __visc__atomic_xchg(int*, int); -int __visc__atomic_inc(int*); -int __visc__atomic_dec(int*); -int __visc__atomic_min(int*, int); -int __visc__atomic_max(int*, int); -int __visc__atomic_umax(int*, int); -int __visc__atomic_umin(int*, int); -int __visc__atomic_and(int*, int); -int __visc__atomic_or(int*, int); -int __visc__atomic_xor(int*, int); +int __visc__atomic_cmpxchg(int*, int, int) noexcept; +int __visc__atomic_add(int*, int) noexcept; +int __visc__atomic_sub(int*, int) noexcept; +int __visc__atomic_xchg(int*, int) noexcept; +int __visc__atomic_inc(int*) noexcept; +int __visc__atomic_dec(int*) noexcept; +int __visc__atomic_min(int*, int) noexcept; +int __visc__atomic_max(int*, int) noexcept; +int __visc__atomic_umax(int*, int) noexcept; +int __visc__atomic_umin(int*, int) noexcept; +int __visc__atomic_and(int*, int) noexcept; +int __visc__atomic_or(int*, int) noexcept; +int __visc__atomic_xor(int*, int) noexcept; // Special Func -float __visc__floor(float); -float __visc__rsqrt(float); -float __visc__sqrt(float); -float __visc__sin(float); -float __visc__cos(float); +float __visc__floor(float) noexcept; +float __visc__rsqrt(float) noexcept; +float __visc__sqrt(float) noexcept; +float __visc__sin(float) noexcept; +float __visc__cos(float) noexcept; // unsigned int -//unsigned __visc__atomic_cmpxchg(unsigned*, unsigned, unsigned); -//unsigned __visc__atomic_add(unsigned*, unsigned); -//unsigned __visc__atomic_sub(unsigned*, unsigned); -//unsigned __visc__atomic_xchg(unsigned*, unsigned); -//unsigned __visc__atomic_inc(unsigned*); -//unsigned __visc__atomic_dec(unsigned*); -//unsigned __visc__atomic_min(unsigned*, unsigned); -//unsigned __visc__atomic_max(unsigned*, unsigned); -//unsigned __visc__atomic_and(unsigned*, unsigned); -//unsigned __visc__atomic_or(unsigned*, unsigned); -//unsigned __visc__atomic_xor(unsigned*, unsigned); +//unsigned __visc__atomic_cmpxchg(unsigned*, unsigned, unsigned) noexcept; +//unsigned __visc__atomic_add(unsigned*, unsigned) noexcept; +//unsigned __visc__atomic_sub(unsigned*, unsigned) noexcept; +//unsigned __visc__atomic_xchg(unsigned*, unsigned) noexcept; +//unsigned __visc__atomic_inc(unsigned*) noexcept; +//unsigned __visc__atomic_dec(unsigned*) noexcept; +//unsigned __visc__atomic_min(unsigned*, unsigned) noexcept; +//unsigned __visc__atomic_max(unsigned*, unsigned) noexcept; +//unsigned __visc__atomic_and(unsigned*, unsigned) noexcept; +//unsigned __visc__atomic_or(unsigned*, unsigned) noexcept; +//unsigned __visc__atomic_xor(unsigned*, unsigned) noexcept; #include <unistd.h> -long get_global_id(int); -long get_group_id(int); -long get_local_id(int); -long get_local_size(int); +long get_global_id(int) noexcept; +long get_group_id(int) noexcept; +long get_local_id(int) noexcept; +long get_local_size(int) noexcept; -void llvm_visc_track_mem(void*, size_t); -void llvm_visc_untrack_mem(void*); -void llvm_visc_request_mem(void*, size_t); +void llvm_visc_track_mem(void*, size_t) noexcept; +void llvm_visc_untrack_mem(void*) noexcept; +void llvm_visc_request_mem(void*, size_t) noexcept; #ifdef __cplusplus } diff --git a/hpvm/test/parboil/common/mk/visc.mk b/hpvm/test/parboil/common/mk/visc.mk index 781e3601469387a7954309d7b55ecda37c6d11a5..eb11371ccdb931d5160e5143af907a308215eb54 100755 --- a/hpvm/test/parboil/common/mk/visc.mk +++ b/hpvm/test/parboil/common/mk/visc.mk @@ -2,7 +2,7 @@ # Default language wide options LANG_CFLAGS=-I$(PARBOIL_ROOT)/common/include -LANG_CXXFLAGS=$(LANG_CFLAGS) -fno-exceptions +LANG_CXXFLAGS=$(LANG_CFLAGS) LANG_LDFLAGS=-lOpenCL -L$(OPENCL_LIB_PATH) -lrt -L$(CUDA_LIB_PATH) -lcudart CFLAGS=$(LANG_CFLAGS) $(PLATFORM_CFLAGS) $(APP_CFLAGS) @@ -197,7 +197,7 @@ $(VISC_RT_LIB) : $(VISC_RT_PATH)/visc-rt.cpp make -C $(LLVM_LIB_PATH) $(HOST) $(KERNEL): $(BUILDDIR)/$(VISC_OBJS) - $(OPT) $(VISC_OPTFLAGS) -S $< -o $(HOST) + $(OPT) --debug $(VISC_OPTFLAGS) -S $< -o $(HOST) $(RUNDIR) : mkdir -p $(RUNDIR)