diff --git a/.gitignore b/.gitignore index 09b3395deae54cb0cd7145f2d75374090aa7bdb0..fdaa75feb6fbffb716f6f0f874b635e79c42ac71 100644 --- a/.gitignore +++ b/.gitignore @@ -33,5 +33,5 @@ hpvm/install/ hpvm/llvm/ hpvm/llvm-*.src.tar.xz hpvm/llvm-*.src/ -hpvm/projects/visc-rt/visc-rt.ll +hpvm/projects/hpvm-rt/hpvm-rt.ll hpvm/test/**/build/ diff --git a/hpvm/docs/hpvm-c.md b/hpvm/docs/hpvm-c.md index 25990c22304a35fe420e83f66b634c4db4489a4c..8644fc7eda7791fb0d1e03b4ae95fb7c2866decf 100644 --- a/hpvm/docs/hpvm-c.md +++ b/hpvm/docs/hpvm-c.md @@ -2,110 +2,110 @@ ## Host API -```void __visc__init()``` +```void __hpvm__init()``` Used before all other HPVM calls to initialize the HPVM runtime. -```void __visc__cleanup()``` +```void __hpvm__cleanup()``` Used at the end of HPVM program to clean up all remaining runtime-created HPVM objects. -```void __visc__cleanup()``` +```void __hpvm__cleanup()``` Used at the end of HPVM program to clean up all remaining runtime-created HPVM objects. -```void llvm_visc_track_mem(void* ptr, size_t sz)``` +```void llvm_hpvm_track_mem(void* ptr, size_t sz)``` Insert memory starting at ```ptr``` of size ```sz``` in the memory tracker of HPVM runtime. -```void llvm_visc_untrack_mem(void* ptr)``` +```void llvm_hpvm_untrack_mem(void* ptr)``` Stop tracking the memory object identified by ```ptr```. -```void llvm_visc_request_mem(void* ptr, size_t sz)``` +```void llvm_hpvm_request_mem(void* ptr, size_t sz)``` If the memory object identified by ```ptr``` is not in host memory, copy it to host memory. -```void* __visc__launch(unsigned isStream, void* rootGraph, void* args)``` +```void* __hpvm__launch(unsigned isStream, void* rootGraph, void* args)``` Launches the execution of the dataflow graph with node function ```rootGraph```. ```args``` is a pointer to a packed struct, containing one field per argument of the RootGraph function, consecutively. For non-streaming DFGs with a non empty result type, ```args``` must contain an additional field of the type ```RootGraph.returnTy```, where the result of the graph will be returned. ```isStream``` chooses between a non streaming (0) or streaming (1) graph execution. Returns a handle to the executing graph. -```void __visc__wait(void* G)``` +```void __hpvm__wait(void* G)``` Waits for completion of execution of the dataflow graph with handle ```G```. -```void __visc__push(void* G, void* args)``` +```void __hpvm__push(void* G, void* args)``` Push set of input data items, ```args```, (same as type included in launch) to streaming DFG with handle ```G```. -```void* __visc__pop(void* G)``` +```void* __hpvm__pop(void* G)``` Pop and return data produced from one execution of streaming DFG with handle ```G```. ## Internal Node API -```void* __visc__createNodeND(unsigned dims, void* F, ...)``` +```void* __hpvm__createNodeND(unsigned dims, void* F, ...)``` Creates a static dataflow node replicated in ```dims``` dimensions (0 to 3), each executing node function ```F```. The arguments following ```F``` are the size of each dimension, respectively, passed in as a ```size_t```. Returns a handle to the created dataflow node. -```void* __visc__edge(void* src, void* dst, unsigned replType, unsigned sp, unsigned dp, unsigned stream)``` +```void* __hpvm__edge(void* src, void* dst, unsigned replType, unsigned sp, unsigned dp, unsigned stream)``` Creates an edge from output ```sp``` of node ```src``` to input ```dp``` of node ```dst```. If ```replType``` is 0, the edge is a one-to-one edge, otherwise it is an all-to-all edge. ```isStream``` defines whether or not the edge is streaming. Returns a handle to the created edge. -```void __visc__bindIn(void* N, unsigned ip, unsigned ic, unsigned isStream)``` +```void __hpvm__bindIn(void* N, unsigned ip, unsigned ic, unsigned isStream)``` Binds the input ```ip``` of the current node to input ```ic``` of child node function ```N```. ```isStream``` defines whether or not the input bind is streaming. -```void __visc__bindOut(void* N, unsigned op, unsigned oc, unsigned isStream)``` +```void __hpvm__bindOut(void* N, unsigned op, unsigned oc, unsigned isStream)``` Binds the output ```op``` of the current node to output ```oc``` of child node function ```N```. ```isStream``` defines whether or not the output bind is streaming. -```void __visc__hint(enum Target target)``` (C\) / ```void __visc__hint(visc::Target target)``` (C++) +```void __hpvm__hint(enum Target target)``` (C\) / ```void __hpvm__hint(hpvm::Target target)``` (C++) Must be called once in each node function. Indicates which hardware target the current function should run in -```void __visc__attributes(unsigned ni, …, unsigned no, …)``` +```void __hpvm__attributes(unsigned ni, …, unsigned no, …)``` Must be called once at the beginning of each node function. Defines the properties of the pointer arguments to the current function. ```ni``` represents the number of input arguments, and ```no``` the number of output arguments. The arguments following ```ni``` are the input arguments, and the arguments following ```no``` are the output arguments. Arguments can be marked as both input and output. All pointer arguments must be included. ## Leaf Node API -```void __visc__hint(enum Target target)``` (C\) / ```void __visc__hint(visc::Target target)``` (C++) +```void __hpvm__hint(enum Target target)``` (C\) / ```void __hpvm__hint(hpvm::Target target)``` (C++) As described in internal node API. -```void __visc__attributes(unsigned ni, …, unsigned no, …)``` +```void __hpvm__attributes(unsigned ni, …, unsigned no, …)``` As described in internal node API. -```void __visc__return(unsigned n, ...)``` -Returns ```n``` values from a leaf node function. The remaining arguments are the values to be returned. All ```__visc__return``` statements within the same function must return the same number of values. +```void __hpvm__return(unsigned n, ...)``` +Returns ```n``` values from a leaf node function. The remaining arguments are the values to be returned. All ```__hpvm__return``` statements within the same function must return the same number of values. -```void* __visc__getNode()``` +```void* __hpvm__getNode()``` Returns a handle to the current leaf node. -```void* __visc__getParentNode(void* N)``` +```void* __hpvm__getParentNode(void* N)``` Returns a handle to the parent node of node ```N```. -```long __visc__getNodeInstanceID_{x,y,z}(void* N)``` +```long __hpvm__getNodeInstanceID_{x,y,z}(void* N)``` Returns the dynamic ID of the current instance of node ```N``` in the x, y, or z dimension respectively. The dimension must be one of the dimensions in which the node is replicated. -```long __visc__getNumNodeInstances_{x,y,z}(void* N)``` +```long __hpvm__getNumNodeInstances_{x,y,z}(void* N)``` Returns the number of dynamic instances of node ```N``` in the x, y, or z dimension respectively. The dimension must be one of the dimensions in which the node is replicated. -```void __visc__barrier()``` +```void __hpvm__barrier()``` Local synchronization barrier across dynamic instances of current leaf node. -```void* __visc__malloc(long nBytes)``` +```void* __hpvm__malloc(long nBytes)``` Allocate a block of memory of size ```nBytes``` and returns a pointer to it. The allocated object can be shared by all nodes, although the pointer returned must somehow be communicated explicitly for use by other nodes. -```int __visc__atomic_add(int* m, int v)``` +```int __hpvm__atomic_add(int* m, int v)``` Atomically adds ```v``` to the value stored at memory location ```[m]```. Returns the value previously stored at ```[m]```. -```int __visc__atomic_sub(int* m, int v)``` +```int __hpvm__atomic_sub(int* m, int v)``` Atomically subtracts ```v``` from the value stored at memory location ```[m]```. Returns the value previously stored at ```[m]```. -```int __visc__atomic_xchg(int* m, int v)``` +```int __hpvm__atomic_xchg(int* m, int v)``` Atomically swaps ```v``` with the value stored at memory location ```[m]```. Returns the value previously stored at ```[m]```. -```int __visc__atomic_inc(int* m)``` +```int __hpvm__atomic_inc(int* m)``` Atomically increments the value stored at memory location ```[m]```. Returns the value previously stored at ```[m]```. -```int __visc__atomic_dec(int* m)``` +```int __hpvm__atomic_dec(int* m)``` Atomically decrements the value stored at memory location ```[m]```. Returns the value previously stored at ```[m]```. -```int __visc__atomic_min(int* m, int v)``` +```int __hpvm__atomic_min(int* m, int v)``` Atomically computes the min of ```v``` and the value stored at memory location ```[m]```. Returns the value previously stored at ```[m]```. -```int __visc__atomic_max(int* m, int v)``` +```int __hpvm__atomic_max(int* m, int v)``` Atomically computes the max of ```v``` and the value stored at memory location ```[m]```. Returns the value previously stored at ```[m]```. -```int __visc__atomic_and(int* m, int v)``` +```int __hpvm__atomic_and(int* m, int v)``` Atomically computes the bitwise AND of ```v``` and the value stored at memory location ```[m]```. Returns the value previously stored at ```[m]```. -```int __visc__atomic_or(int* m, int v)``` +```int __hpvm__atomic_or(int* m, int v)``` Atomically computes the bitwise OR of ```v``` and the value stored at memory location ```[m]```. Returns the value previously stored at ```[m]```. -```int __visc__atomic_xor(int* m, int v)``` +```int __hpvm__atomic_xor(int* m, int v)``` Atomically computes the bitwise XOR of ```v``` and the value stored at memory location ```[m]```. Returns the value previously stored at ```[m]```. diff --git a/hpvm/docs/hpvm-specification.md b/hpvm/docs/hpvm-specification.md index bc19c95f9c03af261b915665cae7b1b996e5bb34..c3dece54945d147daf7885050d6a8f1db4eb014b 100644 --- a/hpvm/docs/hpvm-specification.md +++ b/hpvm/docs/hpvm-specification.md @@ -101,7 +101,7 @@ Return a handle to the current dataflow node. ```i8* llvm.hpvm.getParentNode(i8* N)``` Return a handle to the parent in the hierarchy of node ```N```. -```i32 llvm.visc.getNumDims(i8* N)``` +```i32 llvm.hpvm.getNumDims(i8* N)``` Get the number of dimensions of node ```N```. ```i64 llvm.hpvm.getNodeInstanceID.{x,y,z}(i8* N)``` diff --git a/hpvm/include/BuildDFG/BuildDFG.h b/hpvm/include/BuildDFG/BuildDFG.h index 28230e135beb68c07c998e607fa3d03d40a66791..ca4c616da5f4076528b1294992ec8ad3ab768809 100644 --- a/hpvm/include/BuildDFG/BuildDFG.h +++ b/hpvm/include/BuildDFG/BuildDFG.h @@ -10,7 +10,7 @@ // //===----------------------------------------------------------------------===// -#include "SupportVISC/DFGraph.h" +#include "SupportHPVM/DFGraph.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" @@ -58,10 +58,10 @@ public: // Functions virtual bool runOnModule(Module &M); - static bool isViscLaunchIntrinsic(Instruction *I); - static bool isViscGraphIntrinsic(Instruction *I); - static bool isViscQueryIntrinsic(Instruction *I); - static bool isViscIntrinsic(Instruction *I); + static bool isHPVMLaunchIntrinsic(Instruction *I); + static bool isHPVMGraphIntrinsic(Instruction *I); + static bool isHPVMQueryIntrinsic(Instruction *I); + static bool isHPVMIntrinsic(Instruction *I); static bool isTypeCongruent(Type *L, Type *R); // TODO: Maybe make these fields const diff --git a/hpvm/include/GenVISC/GenVISC.h b/hpvm/include/GenHPVM/GenHPVM.h similarity index 67% rename from hpvm/include/GenVISC/GenVISC.h rename to hpvm/include/GenHPVM/GenHPVM.h index 1db9929be70fdc4335e23d7e879248f0ebb45c07..24798bc2740e2299f67cc7f515437339f2fe8310 100644 --- a/hpvm/include/GenVISC/GenVISC.h +++ b/hpvm/include/GenHPVM/GenHPVM.h @@ -1,4 +1,4 @@ -//== GenVISC.h - Header file for "LLVM IR to VISC IR Pass" =// +//== GenHPVM.h - Header file for "LLVM IR to HPVM IR Pass" =// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// -#include "SupportVISC/VISCTimer.h" +#include "SupportHPVM/HPVMTimer.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" @@ -18,24 +18,24 @@ using namespace llvm; -namespace genvisc { -// GenVISC - The first implementation. -struct GenVISC : public ModulePass { +namespace genhpvm { +// GenHPVM - The first implementation. +struct GenHPVM : public ModulePass { static char ID; // Pass identification, replacement for typeid - GenVISC() : ModulePass(ID) {} + GenHPVM() : ModulePass(ID) {} private: // Member variables Module *M; - FunctionCallee llvm_visc_initializeTimerSet; - FunctionCallee llvm_visc_switchToTimer; - FunctionCallee llvm_visc_printTimerSet; + FunctionCallee llvm_hpvm_initializeTimerSet; + FunctionCallee llvm_hpvm_switchToTimer; + FunctionCallee llvm_hpvm_printTimerSet; GlobalVariable *TimerSet; // Functions void initializeTimerSet(Instruction *); - void switchToTimer(enum visc_TimerID, Instruction *); + void switchToTimer(enum hpvm_TimerID, Instruction *); void printTimerSet(Instruction *); Value *getStringPointer(const Twine &S, Instruction *InsertBefore, const Twine &Name = ""); @@ -45,4 +45,4 @@ public: virtual bool runOnModule(Module &M); }; -} // namespace genvisc +} // namespace genhpvm diff --git a/hpvm/include/SupportVISC/DFG2LLVM.h b/hpvm/include/SupportHPVM/DFG2LLVM.h similarity index 82% rename from hpvm/include/SupportVISC/DFG2LLVM.h rename to hpvm/include/SupportHPVM/DFG2LLVM.h index b9e4cc4158b71ab18fbeadf2e4d094055feb6149..07147c6d909f5352dd886b5f8bc1a2b0ae434ffe 100644 --- a/hpvm/include/SupportVISC/DFG2LLVM.h +++ b/hpvm/include/SupportHPVM/DFG2LLVM.h @@ -1,7 +1,7 @@ #ifndef __DFG2LLVM_H__ #define __DFG2LLVM_H__ -//===---- DFG2LLVM.h - Header file for "VISC Dataflow Graph to Target" ----===// +//===---- DFG2LLVM.h - Header file for "HPVM Dataflow Graph to Target" ----===// // // The LLVM Compiler Infrastructure // @@ -11,9 +11,9 @@ //===----------------------------------------------------------------------===// #include "BuildDFG/BuildDFG.h" -#include "SupportVISC/VISCHint.h" -#include "SupportVISC/VISCTimer.h" -#include "SupportVISC/VISCUtils.h" +#include "SupportHPVM/HPVMHint.h" +#include "SupportHPVM/HPVMTimer.h" +#include "SupportHPVM/HPVMUtils.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" @@ -26,7 +26,7 @@ using namespace builddfg; #define TIMER(X) \ do { \ - if (VISCTimer) { \ + if (HPVMTimer) { \ X; \ } \ } while (0) @@ -37,8 +37,8 @@ using namespace builddfg; namespace dfg2llvm { // Helper Functions -static inline ConstantInt *getTimerID(Module &, enum visc_TimerID); -static inline ConstantInt *getTimerID(Module &, enum visc::Target); +static inline ConstantInt *getTimerID(Module &, enum hpvm_TimerID); +static inline ConstantInt *getTimerID(Module &, enum hpvm::Target); bool hasAttribute(Function *, unsigned, Attribute::AttrKind); @@ -69,7 +69,7 @@ protected: // Member variables Module &M; BuildDFG &DFG; - bool VISCTimer = false; + bool HPVMTimer = false; std::string TargetName = "None"; // Map from Old function associated with DFNode to new cloned function with @@ -78,12 +78,12 @@ protected: // "Have we visited this function before?") DenseMap<DFNode *, Value *> OutputMap; - // VISC Runtime API + // HPVM Runtime API std::unique_ptr<Module> runtimeModule; - FunctionCallee llvm_visc_initializeTimerSet; - FunctionCallee llvm_visc_switchToTimer; - FunctionCallee llvm_visc_printTimerSet; + FunctionCallee llvm_hpvm_initializeTimerSet; + FunctionCallee llvm_hpvm_switchToTimer; + FunctionCallee llvm_hpvm_printTimerSet; GlobalVariable *TimerSet; GlobalVariable *GraphIDAddr; Instruction *InitCall; @@ -109,7 +109,7 @@ protected: // Virtual Functions virtual void initializeTimerSet(Instruction *); - virtual void switchToTimer(enum visc_TimerID, Instruction *); + virtual void switchToTimer(enum hpvm_TimerID, Instruction *); virtual void printTimerSet(Instruction *); virtual ~CodeGenTraversal() {} @@ -118,9 +118,9 @@ public: // Constructor CodeGenTraversal(Module &_M, BuildDFG &_DFG) : M(_M), DFG(_DFG) {} - static bool checkPreferredTarget(DFNode *N, visc::Target T); - static bool preferredTargetIncludes(DFNode *N, visc::Target T); - visc::Target getPreferredTarget(DFNode *N); + static bool checkPreferredTarget(DFNode *N, hpvm::Target T); + static bool preferredTargetIncludes(DFNode *N, hpvm::Target T); + hpvm::Target getPreferredTarget(DFNode *N); virtual void visit(DFInternalNode *N) { // If code has already been generated for this internal node, skip the @@ -157,25 +157,25 @@ public: // -------------- CodeGenTraversal Implementation ----------------- -bool CodeGenTraversal::checkPreferredTarget(DFNode *N, visc::Target T) { +bool CodeGenTraversal::checkPreferredTarget(DFNode *N, hpvm::Target T) { Function *F = N->getFuncPointer(); Module *M = F->getParent(); NamedMDNode *HintNode; switch (T) { - case visc::GPU_TARGET: - HintNode = M->getOrInsertNamedMetadata("visc_hint_gpu"); + case hpvm::GPU_TARGET: + HintNode = M->getOrInsertNamedMetadata("hpvm_hint_gpu"); break; - case visc::SPIR_TARGET: - HintNode = M->getOrInsertNamedMetadata("visc_hint_spir"); + case hpvm::SPIR_TARGET: + HintNode = M->getOrInsertNamedMetadata("hpvm_hint_spir"); break; - case visc::CUDNN_TARGET: - HintNode = M->getOrInsertNamedMetadata("visc_hint_cudnn"); + case hpvm::CUDNN_TARGET: + HintNode = M->getOrInsertNamedMetadata("hpvm_hint_cudnn"); break; - case visc::PROMISE_TARGET: - HintNode = M->getOrInsertNamedMetadata("visc_hint_promise"); + case hpvm::PROMISE_TARGET: + HintNode = M->getOrInsertNamedMetadata("hpvm_hint_promise"); break; - case visc::CPU_TARGET: - HintNode = M->getOrInsertNamedMetadata("visc_hint_cpu"); + case hpvm::CPU_TARGET: + HintNode = M->getOrInsertNamedMetadata("hpvm_hint_cpu"); break; default: llvm_unreachable("Target Not supported yet!"); @@ -190,37 +190,37 @@ bool CodeGenTraversal::checkPreferredTarget(DFNode *N, visc::Target T) { return false; } -visc::Target CodeGenTraversal::getPreferredTarget(DFNode *N) { - return viscUtils::getPreferredTarget(N->getFuncPointer()); +hpvm::Target CodeGenTraversal::getPreferredTarget(DFNode *N) { + return hpvmUtils::getPreferredTarget(N->getFuncPointer()); } -bool CodeGenTraversal::preferredTargetIncludes(DFNode *N, visc::Target T) { +bool CodeGenTraversal::preferredTargetIncludes(DFNode *N, hpvm::Target T) { Function *F = N->getFuncPointer(); Module *M = F->getParent(); std::vector<NamedMDNode *> HintNode; switch (T) { - case visc::GPU_TARGET: - HintNode.push_back(M->getOrInsertNamedMetadata("visc_hint_gpu")); - HintNode.push_back(M->getOrInsertNamedMetadata("visc_hint_cpu_gpu")); + case hpvm::GPU_TARGET: + HintNode.push_back(M->getOrInsertNamedMetadata("hpvm_hint_gpu")); + HintNode.push_back(M->getOrInsertNamedMetadata("hpvm_hint_cpu_gpu")); break; - case visc::SPIR_TARGET: - HintNode.push_back(M->getOrInsertNamedMetadata("visc_hint_spir")); - HintNode.push_back(M->getOrInsertNamedMetadata("visc_hint_cpu_spir")); + case hpvm::SPIR_TARGET: + HintNode.push_back(M->getOrInsertNamedMetadata("hpvm_hint_spir")); + HintNode.push_back(M->getOrInsertNamedMetadata("hpvm_hint_cpu_spir")); break; - case visc::CPU_TARGET: - HintNode.push_back(M->getOrInsertNamedMetadata("visc_hint_cpu")); - HintNode.push_back(M->getOrInsertNamedMetadata("visc_hint_cpu_gpu")); - HintNode.push_back(M->getOrInsertNamedMetadata("visc_hint_cpu_spir")); + case hpvm::CPU_TARGET: + HintNode.push_back(M->getOrInsertNamedMetadata("hpvm_hint_cpu")); + HintNode.push_back(M->getOrInsertNamedMetadata("hpvm_hint_cpu_gpu")); + HintNode.push_back(M->getOrInsertNamedMetadata("hpvm_hint_cpu_spir")); break; - case visc::CUDNN_TARGET: - HintNode.push_back(M->getOrInsertNamedMetadata("visc_hint_cudnn")); + case hpvm::CUDNN_TARGET: + HintNode.push_back(M->getOrInsertNamedMetadata("hpvm_hint_cudnn")); break; - case visc::PROMISE_TARGET: - HintNode.push_back(M->getOrInsertNamedMetadata("visc_hint_promise")); + case hpvm::PROMISE_TARGET: + HintNode.push_back(M->getOrInsertNamedMetadata("hpvm_hint_promise")); break; - case visc::CPU_OR_GPU_TARGET: - case visc::CPU_OR_SPIR_TARGET: + case hpvm::CPU_OR_GPU_TARGET: + case hpvm::CPU_OR_SPIR_TARGET: assert(false && "Target should be one of CPU/GPU/SPIR\n"); break; default: @@ -308,11 +308,11 @@ Function *CodeGenTraversal::addArgument(Function *F, Type *Ty, Function *newF = Function::Create(FTy, F->getLinkage(), F->getName() + "_cloned", F->getParent()); renameNewArgument(newF, name); - newF = viscUtils::cloneFunction(F, newF, false); + newF = hpvmUtils::cloneFunction(F, newF, false); // Check if the function is used by a metadata node if (F->isUsedByMetadata()) { - viscUtils::fixHintMetadata(*F->getParent(), F, newF); + hpvmUtils::fixHintMetadata(*F->getParent(), F, newF); } return newF; @@ -396,32 +396,32 @@ Argument *CodeGenTraversal::getArgumentAt(Function *F, unsigned offset) { } void CodeGenTraversal::initTimerAPI() { - DECLARE(llvm_visc_initializeTimerSet); - DECLARE(llvm_visc_switchToTimer); - DECLARE(llvm_visc_printTimerSet); + DECLARE(llvm_hpvm_initializeTimerSet); + DECLARE(llvm_hpvm_switchToTimer); + DECLARE(llvm_hpvm_printTimerSet); } // Timer Routines // Initialize the timer set void CodeGenTraversal::initializeTimerSet(Instruction *InsertBefore) { - // DEBUG(errs() << "Inserting call to: " << *llvm_visc_initializeTimerSet << + // DEBUG(errs() << "Inserting call to: " << *llvm_hpvm_initializeTimerSet << // "\n"); TIMER(TimerSet = new GlobalVariable( M, Type::getInt8PtrTy(M.getContext()), false, GlobalValue::CommonLinkage, Constant::getNullValue(Type::getInt8PtrTy(M.getContext())), - Twine("viscTimerSet_") + TargetName); + Twine("hpvmTimerSet_") + TargetName); DEBUG(errs() << "New global variable: " << *TimerSet << "\n"); - Value *TimerSetAddr = CallInst::Create(llvm_visc_initializeTimerSet, + Value *TimerSetAddr = CallInst::Create(llvm_hpvm_initializeTimerSet, None, "", InsertBefore); new StoreInst(TimerSetAddr, TimerSet, InsertBefore);); } -void CodeGenTraversal::switchToTimer(enum visc_TimerID timer, +void CodeGenTraversal::switchToTimer(enum hpvm_TimerID timer, Instruction *InsertBefore) { Value *switchArgs[] = {TimerSet, getTimerID(M, timer)}; - TIMER(CallInst::Create(llvm_visc_switchToTimer, + TIMER(CallInst::Create(llvm_hpvm_switchToTimer, ArrayRef<Value *>(switchArgs, 2), "", InsertBefore)); } @@ -430,16 +430,16 @@ void CodeGenTraversal::printTimerSet(Instruction *InsertBefore) { TIMER(TimerName = getStringPointer(TargetName + Twine("_Timer"), InsertBefore)); Value *printArgs[] = {TimerSet, TimerName}; - TIMER(CallInst::Create(llvm_visc_printTimerSet, + TIMER(CallInst::Create(llvm_hpvm_printTimerSet, ArrayRef<Value *>(printArgs, 2), "", InsertBefore)); } // Implementation of Helper Functions -static inline ConstantInt *getTimerID(Module &M, enum visc_TimerID timer) { +static inline ConstantInt *getTimerID(Module &M, enum hpvm_TimerID timer) { return ConstantInt::get(Type::getInt32Ty(M.getContext()), timer); } -static inline ConstantInt *getTargetID(Module &M, enum visc::Target T) { +static inline ConstantInt *getTargetID(Module &M, enum hpvm::Target T) { return ConstantInt::get(Type::getInt32Ty(M.getContext()), T); } diff --git a/hpvm/include/SupportVISC/DFGTreeTraversal.h b/hpvm/include/SupportHPVM/DFGTreeTraversal.h similarity index 100% rename from hpvm/include/SupportVISC/DFGTreeTraversal.h rename to hpvm/include/SupportHPVM/DFGTreeTraversal.h diff --git a/hpvm/include/SupportVISC/DFGraph.h b/hpvm/include/SupportHPVM/DFGraph.h similarity index 94% rename from hpvm/include/SupportVISC/DFGraph.h rename to hpvm/include/SupportHPVM/DFGraph.h index 0c224a344c4ec342f52f4816280e101518ba43dd..d904e2401d7e9a58a38e9bca024de1a437cd56d1 100644 --- a/hpvm/include/SupportVISC/DFGraph.h +++ b/hpvm/include/SupportHPVM/DFGraph.h @@ -20,8 +20,8 @@ #ifndef LLVM_IR_DFGRAPH_H #define LLVM_IR_DFGRAPH_H -#include "SupportVISC/VISCHint.h" -#include "SupportVISC/VISCUtils.h" +#include "SupportHPVM/HPVMHint.h" +#include "SupportHPVM/HPVMUtils.h" #include "llvm/ADT/GraphTraits.h" #include "llvm/IR/Function.h" #include "llvm/IR/IntrinsicInst.h" @@ -158,7 +158,7 @@ public: } }; -// DFNode represents a single VISC Dataflow Node in LLVM. +// DFNode represents a single HPVM Dataflow Node in LLVM. // // A Dataflow Node basically consists of // 1. Pointer to a function describing this dataflow node @@ -210,8 +210,8 @@ private: ///< hierarchy unsigned Rank; ///< Ordering based on toplogical sort const DFNodeKind Kind; ///< Kind of Node Internal/Leaf - visc::Target Tag; ///< Code Generated for which backend - visc::Target Hint; ///< To store preferred backend + hpvm::Target Tag; ///< Code Generated for which backend + hpvm::Target Hint; ///< To store preferred backend public: virtual ~DFNode() { @@ -287,13 +287,13 @@ public: DFNodeKind getKind() const { return Kind; } - DFNode(IntrinsicInst *_II, Function *_FuncPointer, visc::Target _Hint, + DFNode(IntrinsicInst *_II, Function *_FuncPointer, hpvm::Target _Hint, DFInternalNode *_Parent, unsigned _NumOfDim, std::vector<Value *> _DimLimits, DFNodeKind _K); bool isRoot() const { // It is a root node is it was created from a launch intrinsic - if (II->getCalledFunction()->getName().equals("llvm.visc.launch")) { + if (II->getCalledFunction()->getName().equals("llvm.hpvm.launch")) { assert(Level == 0 && "Root node's level is zero."); return true; } @@ -326,9 +326,9 @@ public: unsigned getRank() const { return Rank; } - void setTag(visc::Target T) { Tag = T; } + void setTag(hpvm::Target T) { Tag = T; } - visc::Target getTag() const { return Tag; } + hpvm::Target getTag() const { return Tag; } void *getProperty(PropertyKind PType) { assert(PropertyList.count(PType) == 1 && @@ -342,24 +342,24 @@ public: PropertyList[PType] = PValue; } - void setGenFunc(Function *F, visc::Target T) { + void setGenFunc(Function *F, hpvm::Target T) { GenFunc = F; Tag = T; } Function *getGenFunc() const { return GenFunc; } - void setHasX86FuncForTarget(visc::Target T, bool isX86Func) { + void setHasX86FuncForTarget(hpvm::Target T, bool isX86Func) { switch (T) { - case visc::None: + case hpvm::None: return; // Do nothing. - case visc::CPU_TARGET: + case hpvm::CPU_TARGET: GenFuncInfo.cpu_hasX86Func = isX86Func; break; - case visc::GPU_TARGET: + case hpvm::GPU_TARGET: GenFuncInfo.gpu_hasX86Func = isX86Func; break; - case visc::CPU_OR_GPU_TARGET: + case hpvm::CPU_OR_GPU_TARGET: break; default: assert(false && "Unknown target\n"); @@ -368,15 +368,15 @@ public: return; } - bool hasX86GenFuncForTarget(visc::Target T) const { + bool hasX86GenFuncForTarget(hpvm::Target T) const { switch (T) { - case visc::None: + case hpvm::None: return false; - case visc::CPU_TARGET: + case hpvm::CPU_TARGET: return GenFuncInfo.cpu_hasX86Func; - case visc::GPU_TARGET: + case hpvm::GPU_TARGET: return GenFuncInfo.gpu_hasX86Func; - case visc::CPU_OR_GPU_TARGET: + case hpvm::CPU_OR_GPU_TARGET: assert(false && "Single target expected (CPU/GPU/SPIR/CUDNN/PROMISE)\n"); default: assert(false && "Unknown target\n"); @@ -384,10 +384,10 @@ public: return false; } - void addGenFunc(Function *F, visc::Target T, bool isX86Func) { + void addGenFunc(Function *F, hpvm::Target T, bool isX86Func) { switch (T) { - case visc::CPU_TARGET: + case hpvm::CPU_TARGET: if (GenFuncs.CPUGenFunc != NULL) { DEBUG(errs() << "Warning: Second generated CPU function for node " << FuncPointer->getName() << "\n"); @@ -395,7 +395,7 @@ public: GenFuncs.CPUGenFunc = F; GenFuncInfo.cpu_hasX86Func = isX86Func; break; - case visc::GPU_TARGET: + case hpvm::GPU_TARGET: if (GenFuncs.GPUGenFunc != NULL) { DEBUG(errs() << "Warning: Second generated GPU function for node " << FuncPointer->getName() << "\n"); @@ -403,25 +403,25 @@ public: GenFuncs.GPUGenFunc = F; GenFuncInfo.gpu_hasX86Func = isX86Func; break; - case visc::CPU_OR_GPU_TARGET: + case hpvm::CPU_OR_GPU_TARGET: assert(false && "A node function should be set with a tag specifying its \ type, not the node hint itself\n"); default: assert(false && "Unknown target for generated function\n"); } - Tag = viscUtils::getUpdatedTag(Tag, T); + Tag = hpvmUtils::getUpdatedTag(Tag, T); } - Function *getGenFuncForTarget(visc::Target T) const { + Function *getGenFuncForTarget(hpvm::Target T) const { switch (T) { - case visc::None: + case hpvm::None: return NULL; - case visc::CPU_TARGET: + case hpvm::CPU_TARGET: return GenFuncs.CPUGenFunc; - case visc::GPU_TARGET: + case hpvm::GPU_TARGET: return GenFuncs.GPUGenFunc; - case visc::CPU_OR_GPU_TARGET: + case hpvm::CPU_OR_GPU_TARGET: assert(false && "Requesting genarated node function with dual tag instead of \ CPU/GPU/SPIR/CUDNN/PROMISE\n"); @@ -431,19 +431,19 @@ public: return NULL; } - void removeGenFuncForTarget(visc::Target T) { + void removeGenFuncForTarget(hpvm::Target T) { switch (T) { - case visc::None: + case hpvm::None: return; - case visc::CPU_TARGET: + case hpvm::CPU_TARGET: GenFuncs.CPUGenFunc = NULL; GenFuncInfo.cpu_hasX86Func = false; break; - case visc::GPU_TARGET: + case hpvm::GPU_TARGET: GenFuncs.GPUGenFunc = NULL; GenFuncInfo.gpu_hasX86Func = false; break; - case visc::CPU_OR_GPU_TARGET: + case hpvm::CPU_OR_GPU_TARGET: assert(false && "Removing genarated node function with dual tag instead of \ CPU/GPU/SPIR/CUDNN/PROMISE\n"); @@ -453,9 +453,9 @@ public: return; } - void setTargetHint(visc::Target T) { Hint = T; } + void setTargetHint(hpvm::Target T) { Hint = T; } - visc::Target getTargetHint() const { return Hint; } + hpvm::Target getTargetHint() const { return Hint; } bool isDummyNode() const { return isEntryNode() || isExitNode(); } @@ -496,7 +496,7 @@ private: DFGraph *childGraph; ///< Pointer to dataflow graph // Constructor - DFInternalNode(IntrinsicInst *II, Function *FuncPointer, visc::Target Hint, + DFInternalNode(IntrinsicInst *II, Function *FuncPointer, hpvm::Target Hint, DFInternalNode *Parent, int NumOfDim, std::vector<Value *> DimLimits) : DFNode(II, FuncPointer, Hint, Parent, NumOfDim, DimLimits, @@ -508,7 +508,7 @@ private: public: static DFInternalNode * Create(IntrinsicInst *II, Function *FuncPointer, - visc::Target Hint = visc::CPU_TARGET, DFInternalNode *Parent = NULL, + hpvm::Target Hint = hpvm::CPU_TARGET, DFInternalNode *Parent = NULL, int NumOfDim = 0, std::vector<Value *> DimLimits = std::vector<Value *>()) { @@ -539,14 +539,14 @@ class DFLeafNode : public DFNode { private: // Constructor - DFLeafNode(IntrinsicInst *II, Function *FuncPointer, visc::Target Hint, + DFLeafNode(IntrinsicInst *II, Function *FuncPointer, hpvm::Target Hint, DFInternalNode *Parent, int NumOfDim = 0, std::vector<Value *> DimLimits = std::vector<Value *>()) : DFNode(II, FuncPointer, Hint, Parent, NumOfDim, DimLimits, LeafNode) {} public: static DFLeafNode * - Create(IntrinsicInst *II, Function *FuncPointer, visc::Target Hint, + Create(IntrinsicInst *II, Function *FuncPointer, hpvm::Target Hint, DFInternalNode *Parent, int NumOfDim = 0, std::vector<Value *> DimLimits = std::vector<Value *>()) { return new DFLeafNode(II, FuncPointer, Hint, Parent, NumOfDim, DimLimits); @@ -558,7 +558,7 @@ public: // void applyDFEdgeVisitor(DFEdgeVisitor &V); /*virtual*/ }; -// DFEdge represents a single VISC Dataflow Edge in LLVM. +// DFEdge represents a single HPVM Dataflow Edge in LLVM. // // A Dataflow Edge basically consists of // 1. Pointer to the dataflow node that is the source of this edge @@ -634,8 +634,8 @@ DFGraph::DFGraph(DFInternalNode *P) { Parent = P; // Create dummy entry and exit nodes and add them to the graph Entry = - DFLeafNode::Create(NULL, Parent->getFuncPointer(), visc::None, Parent); - Exit = DFLeafNode::Create(NULL, Parent->getFuncPointer(), visc::None, Parent); + DFLeafNode::Create(NULL, Parent->getFuncPointer(), hpvm::None, Parent); + Exit = DFLeafNode::Create(NULL, Parent->getFuncPointer(), hpvm::None, Parent); addChildDFNode(Entry); addChildDFNode(Exit); } @@ -655,7 +655,7 @@ bool DFGraph::isStreaming() { } //===--------------------- DFNode Outlined Functions --------------===// -DFNode::DFNode(IntrinsicInst *_II, Function *_FuncPointer, visc::Target _Hint, +DFNode::DFNode(IntrinsicInst *_II, Function *_FuncPointer, hpvm::Target _Hint, DFInternalNode *_Parent, unsigned _NumOfDim, std::vector<Value *> _DimLimits, DFNodeKind _K) : II(_II), FuncPointer(_FuncPointer), Parent(_Parent), NumOfDim(_NumOfDim), @@ -663,7 +663,7 @@ DFNode::DFNode(IntrinsicInst *_II, Function *_FuncPointer, visc::Target _Hint, Type *Ty = FuncPointer->getFunctionType()->getReturnType(); - // Allow the return type to be void too, in the hVISC IR. If return type is + // Allow the return type to be void too, in the hHPVM IR. If return type is // void, create an empty struct type and keep that as the return type of the // node. if (Ty->isVoidTy()) @@ -683,7 +683,7 @@ DFNode::DFNode(IntrinsicInst *_II, Function *_FuncPointer, visc::Target _Hint, Level = (_Parent) ? _Parent->getLevel() + 1 : 0; Rank = 0; - Tag = visc::None; + Tag = hpvm::None; GenFuncs.CPUGenFunc = NULL; GenFuncs.GPUGenFunc = NULL; GenFuncs.SPIRGenFunc = NULL; diff --git a/hpvm/include/SupportVISC/VISCHint.h b/hpvm/include/SupportHPVM/HPVMHint.h similarity index 78% rename from hpvm/include/SupportVISC/VISCHint.h rename to hpvm/include/SupportHPVM/HPVMHint.h index 99266b071843ab0417ea73c6e4533dfa381d52cd..1ef4c6eb3b986328080caa9e99e96f444978c03e 100644 --- a/hpvm/include/SupportVISC/VISCHint.h +++ b/hpvm/include/SupportHPVM/HPVMHint.h @@ -1,4 +1,4 @@ -//===------------ VISCTimer.h - Header file for "VISC Timer API" ----------===// +//===------------ HPVMTimer.h - Header file for "HPVM Timer API" ----------===// // // The LLVM Compiler Infrastructure // @@ -7,12 +7,12 @@ // //===----------------------------------------------------------------------===// -#ifndef VISC_HINT_HEADER -#define VISC_HINT_HEADER +#ifndef HPVM_HINT_HEADER +#define HPVM_HINT_HEADER /************************** Hint Routines ***************************/ #ifdef __cplusplus -namespace visc { +namespace hpvm { #endif enum Target { @@ -32,4 +32,4 @@ enum Target { } #endif -#endif // VISC_HINT_HEADER +#endif // HPVM_HINT_HEADER diff --git a/hpvm/include/SupportHPVM/HPVMTimer.h b/hpvm/include/SupportHPVM/HPVMTimer.h new file mode 100644 index 0000000000000000000000000000000000000000..05b24d41d6d50c61cd38b458676dbf79d28a917f --- /dev/null +++ b/hpvm/include/SupportHPVM/HPVMTimer.h @@ -0,0 +1,151 @@ +//===------------ HPVMTimer.h - Header file for "HPVM Timer API" ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef HPVM_TIMER_HEADER +#define HPVM_TIMER_HEADER + +/************************** Timer Routines ***************************/ +extern "C" { + +/* A time or duration. */ +//#if _POSIX_VERSION >= 200112L +typedef unsigned long long hpvm_Timestamp; /* time in microseconds */ +//#else +//# error "Timestamps not implemented" +//#endif + +enum hpvm_TimerState { + hpvm_Timer_STOPPED, + hpvm_Timer_RUNNING, +}; + +struct hpvm_Timer { + enum hpvm_TimerState state; + hpvm_Timestamp elapsed; /* Amount of time elapsed so far */ + hpvm_Timestamp init; /* Beginning of the current time interval, + * if state is RUNNING. End of the last + * recorded time interfal otherwise. */ +}; + +/* Reset a timer. + * Use this to initialize a timer or to clear + * its elapsed time. The reset timer is stopped. + */ +void hpvm_ResetTimer(struct hpvm_Timer *timer); + +/* Start a timer. The timer is set to RUNNING mode and + * time elapsed while the timer is running is added to + * the timer. + * The timer should not already be running. + */ +void hpvm_StartTimer(struct hpvm_Timer *timer); + +/* Stop a timer. + * This stops adding elapsed time to the timer. + * The timer should not already be stopped. + */ +void hpvm_StopTimer(struct hpvm_Timer *timer); + +/* Get the elapsed time in seconds. */ +double hpvm_GetElapsedTime(struct hpvm_Timer *timer); + +/* Execution time is assigned to one of these categories. */ +enum hpvm_TimerID { + hpvm_TimerID_NONE = 0, + hpvm_TimerID_IO, /* Time spent in input/output */ + hpvm_TimerID_KERNEL, /* Time spent computing on the device, + * recorded asynchronously */ + hpvm_TimerID_COPY, /* Time spent synchronously moving data + * to/from device and allocating/freeing + * memory on the device */ + hpvm_TimerID_DRIVER, /* Time spent in the host interacting with the + * driver, primarily for recording the time + * spent queueing asynchronous operations */ + hpvm_TimerID_COPY_ASYNC, /* Time spent in asynchronous transfers */ + hpvm_TimerID_COMPUTE, /* Time for all program execution other + * than parsing command line arguments, + * I/O, kernel, and copy */ + hpvm_TimerID_OVERLAP, /* Time double-counted in asynchronous and + * host activity: automatically filled in, + * not intended for direct usage */ + // GPU FUNCTION + hpvm_TimerID_INIT_CTX, + hpvm_TimerID_CLEAR_CTX, + hpvm_TimerID_COPY_SCALAR, + hpvm_TimerID_COPY_PTR, + hpvm_TimerID_MEM_FREE, + hpvm_TimerID_READ_OUTPUT, + hpvm_TimerID_SETUP, + hpvm_TimerID_MEM_TRACK, + hpvm_TimerID_MEM_UNTRACK, + hpvm_TimerID_MISC, + // LAUNCH FUNCTION + hpvm_TimerID_PTHREAD_CREATE, + hpvm_TimerID_ARG_PACK, + hpvm_TimerID_ARG_UNPACK, + hpvm_TimerID_COMPUTATION, + hpvm_TimerID_OUTPUT_PACK, + hpvm_TimerID_OUTPUT_UNPACK, + + hpvm_TimerID_LAST /* Number of timer IDs */ +}; + +/* Dynamic list of asynchronously tracked times between events */ +struct hpvm_async_time_marker_list { + char *label; // actually just a pointer to a string + enum hpvm_TimerID timerID; /* The ID to which the interval beginning + * with this marker should be attributed */ + void *marker; + // cudaEvent_t marker; /* The driver event for this marker */ + struct hpvm_async_time_marker_list *next; +}; + +struct hpvm_SubTimer { + char *label; + struct hpvm_Timer timer; + struct hpvm_SubTimer *next; +}; + +struct hpvm_SubTimerList { + struct hpvm_SubTimer *current; + struct hpvm_SubTimer *subtimer_list; +}; + +/* A set of timers for recording execution times. */ +struct hpvm_TimerSet { + enum hpvm_TimerID current; + struct hpvm_async_time_marker_list *async_markers; + hpvm_Timestamp async_begin; + hpvm_Timestamp wall_begin; + struct hpvm_Timer timers[hpvm_TimerID_LAST]; + struct hpvm_SubTimerList *sub_timer_list[hpvm_TimerID_LAST]; +}; + +/* Reset all timers in the set. */ +void hpvm_InitializeTimerSet(struct hpvm_TimerSet *timers); + +void hpvm_AddSubTimer(struct hpvm_TimerSet *timers, char *label, + enum hpvm_TimerID hpvm_Category); + +/* Select which timer the next interval of time should be accounted + * to. The selected timer is started and other timers are stopped. + * Using hpvm_TimerID_NONE stops all timers. */ +inline void hpvm_SwitchToTimer(struct hpvm_TimerSet *timers, + enum hpvm_TimerID timer); + +void hpvm_SwitchToSubTimer(struct hpvm_TimerSet *timers, char *label, + enum hpvm_TimerID category); + +/* Print timer values to standard output. */ +void hpvm_PrintTimerSet(struct hpvm_TimerSet *timers); + +/* Release timer resources */ +void hpvm_DestroyTimerSet(struct hpvm_TimerSet *timers); +} +#endif // HPVM_RT_HEADER diff --git a/hpvm/include/SupportVISC/VISCUtils.h b/hpvm/include/SupportHPVM/HPVMUtils.h similarity index 84% rename from hpvm/include/SupportVISC/VISCUtils.h rename to hpvm/include/SupportHPVM/HPVMUtils.h index 0efd20b5b5eb57943de1feb6d2afa886c6c48a5c..25b9880180f2cb4590f5b5fcbb3f3f2fbe025f8f 100644 --- a/hpvm/include/SupportVISC/VISCUtils.h +++ b/hpvm/include/SupportHPVM/HPVMUtils.h @@ -1,5 +1,5 @@ // -//===---- DFG2LLVM.h - Header file for "VISC Dataflow Graph to Target" ----===// +//===---- DFG2LLVM.h - Header file for "HPVM Dataflow Graph to Target" ----===// // // The LLVM Compiler Infrastructure // @@ -8,12 +8,12 @@ // //===----------------------------------------------------------------------===// -#ifndef VISC_UTILS_HEADER -#define VISC_UTILS_HEADER +#ifndef HPVM_UTILS_HEADER +#define HPVM_UTILS_HEADER #include <assert.h> -#include "SupportVISC/VISCHint.h" +#include "SupportHPVM/HPVMHint.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" @@ -29,31 +29,31 @@ using namespace llvm; -namespace viscUtils { +namespace hpvmUtils { // Helper Functions -static bool isViscCreateNodeIntrinsic(Instruction *I) { +static bool isHPVMCreateNodeIntrinsic(Instruction *I) { if (!isa<IntrinsicInst>(I)) return false; IntrinsicInst *II = cast<IntrinsicInst>(I); return (II->getCalledFunction()->getName()) - .startswith("llvm.visc.createNode"); + .startswith("llvm.hpvm.createNode"); } -static bool isViscCreateNodeCall(Instruction *I) { +static bool isHPVMCreateNodeCall(Instruction *I) { if (!isa<CallInst>(I)) return false; CallInst *CI = cast<CallInst>(I); return (CI->getCalledValue()->stripPointerCasts()->getName()) - .startswith("__visc__createNode"); + .startswith("__hpvm__createNode"); } -static bool isViscLaunchCall(Instruction *I) { +static bool isHPVMLaunchCall(Instruction *I) { if (!isa<CallInst>(I)) return false; CallInst *CI = cast<CallInst>(I); return (CI->getCalledValue()->stripPointerCasts()->getName()) - .startswith("__visc__launch"); + .startswith("__hpvm__launch"); } // Creates a new createNode intrinsic, similar to II but with different // associated function F instead @@ -69,22 +69,22 @@ createIdenticalCreateNodeIntrinsicWithDifferentFunction(Function *F, ArrayRef<Value *> CreateNodeArgs; switch (II->getIntrinsicID()) { - case Intrinsic::visc_createNode: { + case Intrinsic::hpvm_createNode: { CreateNodeArgs = ArrayRef<Value *>(Fp); break; } - case Intrinsic::visc_createNode1D: { + case Intrinsic::hpvm_createNode1D: { Value *CreateNode1DArgs[] = {Fp, II->getArgOperand(1)}; CreateNodeArgs = ArrayRef<Value *>(CreateNode1DArgs, 2); break; } - case Intrinsic::visc_createNode2D: { + case Intrinsic::hpvm_createNode2D: { Value *CreateNode2DArgs[] = {Fp, II->getArgOperand(1), II->getArgOperand(2)}; CreateNodeArgs = ArrayRef<Value *>(CreateNode2DArgs, 3); break; } - case Intrinsic::visc_createNode3D: { + case Intrinsic::hpvm_createNode3D: { Value *CreateNode3DArgs[] = {Fp, II->getArgOperand(1), II->getArgOperand(2), II->getArgOperand(3)}; CreateNodeArgs = ArrayRef<Value *>(CreateNode3DArgs, 4); @@ -101,7 +101,7 @@ createIdenticalCreateNodeIntrinsicWithDifferentFunction(Function *F, return CreateNodeII; } -// Fix VISC hints for this function +// Fix HPVM hints for this function void fixHintMetadata(Module &M, Function *F, Function *G) { Metadata *MD_F = ValueAsMetadata::getIfExists(F); MDTuple *MDT_F = @@ -119,9 +119,9 @@ void fixHintMetadata(Module &M, Function *F, Function *G) { } }; - FixHint("visc_hint_gpu"); - FixHint("visc_hint_cpu"); - FixHint("visc_hint_cpu_gpu"); + FixHint("hpvm_hint_gpu"); + FixHint("hpvm_hint_cpu"); + FixHint("hpvm_hint_cpu_gpu"); } // Assuming that the changed function is a node function, it is only used as a @@ -138,7 +138,7 @@ void replaceNodeFunctionInIR(Module &M, Function *F, Function *G) { ++i) { Instruction *I = &*i; // Grab pointer to Instruction - if (isViscCreateNodeIntrinsic(I)) { + if (isHPVMCreateNodeIntrinsic(I)) { IntrinsicInst *II = cast<IntrinsicInst>(I); // The found createNode is not associated with the changed function if (II->getArgOperand(0) != F) @@ -150,7 +150,7 @@ void replaceNodeFunctionInIR(Module &M, Function *F, Function *G) { createIdenticalCreateNodeIntrinsicWithDifferentFunction(G, II); II->replaceAllUsesWith(CreateNodeII); toBeErased.push_back(II); - } else if (isViscCreateNodeCall(I)) { + } else if (isHPVMCreateNodeCall(I)) { CallInst *CI = cast<CallInst>(I); // The found createNode is not associated with the changed function if (CI->getArgOperand(1) != F) @@ -161,7 +161,7 @@ void replaceNodeFunctionInIR(Module &M, Function *F, Function *G) { // Replace use of F with use of G CI->setArgOperand(1, G); DEBUG(errs() << "Fixed use: " << *CI << "\n"); - } else if (isViscLaunchCall(I)) { + } else if (isHPVMLaunchCall(I)) { CallInst *CI = cast<CallInst>(I); // The found launch call is not associated with the changed function if (CI->getArgOperand(1)->stripPointerCasts() != F) @@ -370,21 +370,21 @@ Function *cloneFunction(Function *F, Function *newF, bool isAddingPtrSizeArg, //------------------- Helper Functions For Handling Hints -------------------// // Return true if 1st arg (tag) contains 2nd (target) -bool tagIncludesTarget(visc::Target Tag, visc::Target T) { +bool tagIncludesTarget(hpvm::Target Tag, hpvm::Target T) { switch (Tag) { - case visc::None: + case hpvm::None: return false; - case visc::CPU_TARGET: - if (T == visc::CPU_TARGET) + case hpvm::CPU_TARGET: + if (T == hpvm::CPU_TARGET) return true; return false; - case visc::GPU_TARGET: - if (T == visc::GPU_TARGET) + case hpvm::GPU_TARGET: + if (T == hpvm::GPU_TARGET) return true; return false; - case visc::CPU_OR_GPU_TARGET: - if ((T == visc::CPU_TARGET) || (T == visc::GPU_TARGET) || - (T == visc::CPU_OR_GPU_TARGET)) + case hpvm::CPU_OR_GPU_TARGET: + if ((T == hpvm::CPU_TARGET) || (T == hpvm::GPU_TARGET) || + (T == hpvm::CPU_OR_GPU_TARGET)) return true; return false; default: @@ -392,41 +392,41 @@ bool tagIncludesTarget(visc::Target Tag, visc::Target T) { } } -bool isSingleTargetTag(visc::Target T) { - return ((T == visc::CPU_TARGET) || (T == visc::GPU_TARGET)); +bool isSingleTargetTag(hpvm::Target T) { + return ((T == hpvm::CPU_TARGET) || (T == hpvm::GPU_TARGET)); } // Add the specified target to the given tag -visc::Target getUpdatedTag(visc::Target Tag, visc::Target T) { - assert(((T == visc::CPU_TARGET) || (T == visc::GPU_TARGET)) && +hpvm::Target getUpdatedTag(hpvm::Target Tag, hpvm::Target T) { + assert(((T == hpvm::CPU_TARGET) || (T == hpvm::GPU_TARGET)) && "The target is only allowed to be a single target: CPU, GPU, SPIR, " "CUDNN, PROMISE\n"); switch (Tag) { - case visc::None: + case hpvm::None: return T; - case visc::CPU_TARGET: - if (T == visc::CPU_TARGET) - return visc::CPU_TARGET; - if (T == visc::GPU_TARGET) - return visc::CPU_OR_GPU_TARGET; + case hpvm::CPU_TARGET: + if (T == hpvm::CPU_TARGET) + return hpvm::CPU_TARGET; + if (T == hpvm::GPU_TARGET) + return hpvm::CPU_OR_GPU_TARGET; return T; - case visc::GPU_TARGET: - if (T == visc::CPU_TARGET) - return visc::CPU_OR_GPU_TARGET; - if (T == visc::GPU_TARGET) - return visc::GPU_TARGET; + case hpvm::GPU_TARGET: + if (T == hpvm::CPU_TARGET) + return hpvm::CPU_OR_GPU_TARGET; + if (T == hpvm::GPU_TARGET) + return hpvm::GPU_TARGET; return T; - case visc::CPU_OR_GPU_TARGET: - return visc::CPU_OR_GPU_TARGET; + case hpvm::CPU_OR_GPU_TARGET: + return hpvm::CPU_OR_GPU_TARGET; default: assert(false && "Unknown Target\n"); } return T; } -// This functions add the hint as metadata in visc code -void addHint(Function *F, visc::Target T) { +// This functions add the hint as metadata in hpvm code +void addHint(Function *F, hpvm::Target T) { // Get Module Module *M = F->getParent(); DEBUG(errs() << "Set preferred target for " << F->getName() << ": "); @@ -434,17 +434,17 @@ void addHint(Function *F, visc::Target T) { // Based on the hint, get the hint metadata NamedMDNode *HintNode; switch (T) { - case visc::GPU_TARGET: + case hpvm::GPU_TARGET: DEBUG(errs() << "GPU Target\n"); - HintNode = M->getOrInsertNamedMetadata("visc_hint_gpu"); + HintNode = M->getOrInsertNamedMetadata("hpvm_hint_gpu"); break; - case visc::CPU_TARGET: + case hpvm::CPU_TARGET: DEBUG(errs() << "CPU Target\n"); - HintNode = M->getOrInsertNamedMetadata("visc_hint_cpu"); + HintNode = M->getOrInsertNamedMetadata("hpvm_hint_cpu"); break; - case visc::CPU_OR_GPU_TARGET: + case hpvm::CPU_OR_GPU_TARGET: DEBUG(errs() << "CPU or GPU Target\n"); - HintNode = M->getOrInsertNamedMetadata("visc_hint_cpu_gpu"); + HintNode = M->getOrInsertNamedMetadata("hpvm_hint_cpu_gpu"); break; default: llvm_unreachable("Unsupported Target Hint!"); @@ -457,8 +457,8 @@ void addHint(Function *F, visc::Target T) { HintNode->addOperand(N); } -// This function removes the hint as metadata in visc code -void removeHint(Function *F, visc::Target T) { +// This function removes the hint as metadata in hpvm code +void removeHint(Function *F, hpvm::Target T) { // Get Module Module *M = F->getParent(); DEBUG(errs() << "Remove preferred target for " << F->getName() << ": " << T @@ -467,14 +467,14 @@ void removeHint(Function *F, visc::Target T) { // Based on the hint, get the hint metadata NamedMDNode *HintNode; switch (T) { - case visc::GPU_TARGET: - HintNode = M->getOrInsertNamedMetadata("visc_hint_gpu"); + case hpvm::GPU_TARGET: + HintNode = M->getOrInsertNamedMetadata("hpvm_hint_gpu"); break; - case visc::CPU_OR_GPU_TARGET: - HintNode = M->getOrInsertNamedMetadata("visc_hint_cpu_gpu"); + case hpvm::CPU_OR_GPU_TARGET: + HintNode = M->getOrInsertNamedMetadata("hpvm_hint_cpu_gpu"); break; - case visc::CPU_TARGET: - HintNode = M->getOrInsertNamedMetadata("visc_hint_cpu"); + case hpvm::CPU_TARGET: + HintNode = M->getOrInsertNamedMetadata("hpvm_hint_cpu"); break; default: llvm_unreachable("Unsupported Target Hint!"); @@ -501,7 +501,7 @@ void removeHint(Function *F, visc::Target T) { } } -visc::Target getPreferredTarget(Function *F) { +hpvm::Target getPreferredTarget(Function *F) { DEBUG(errs() << "Finding preferred target for " << F->getName() << "\n"); Module *M = F->getParent(); @@ -517,16 +517,16 @@ visc::Target getPreferredTarget(Function *F) { return false; }; - if (FoundPrefTarget("visc_hint_cpu")) - return visc::CPU_TARGET; - if (FoundPrefTarget("visc_hint_gpu")) - return visc::GPU_TARGET; - if (FoundPrefTarget("visc_hint_cpu_gpu")) - return visc::CPU_OR_GPU_TARGET; + if (FoundPrefTarget("hpvm_hint_cpu")) + return hpvm::CPU_TARGET; + if (FoundPrefTarget("hpvm_hint_gpu")) + return hpvm::GPU_TARGET; + if (FoundPrefTarget("hpvm_hint_cpu_gpu")) + return hpvm::CPU_OR_GPU_TARGET; - return visc::None; + return hpvm::None; } -} // namespace viscUtils +} // namespace hpvmUtils -#endif // VISC_UTILS_HEADER +#endif // HPVM_UTILS_HEADER diff --git a/hpvm/include/SupportVISC/VISCTimer.h b/hpvm/include/SupportVISC/VISCTimer.h deleted file mode 100644 index ce3dc8a5e0f7c77ff06fec5857f223ca4f0e142f..0000000000000000000000000000000000000000 --- a/hpvm/include/SupportVISC/VISCTimer.h +++ /dev/null @@ -1,151 +0,0 @@ -//===------------ VISCTimer.h - Header file for "VISC Timer API" ----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#ifndef VISC_TIMER_HEADER -#define VISC_TIMER_HEADER - -/************************** Timer Routines ***************************/ -extern "C" { - -/* A time or duration. */ -//#if _POSIX_VERSION >= 200112L -typedef unsigned long long visc_Timestamp; /* time in microseconds */ -//#else -//# error "Timestamps not implemented" -//#endif - -enum visc_TimerState { - visc_Timer_STOPPED, - visc_Timer_RUNNING, -}; - -struct visc_Timer { - enum visc_TimerState state; - visc_Timestamp elapsed; /* Amount of time elapsed so far */ - visc_Timestamp init; /* Beginning of the current time interval, - * if state is RUNNING. End of the last - * recorded time interfal otherwise. */ -}; - -/* Reset a timer. - * Use this to initialize a timer or to clear - * its elapsed time. The reset timer is stopped. - */ -void visc_ResetTimer(struct visc_Timer *timer); - -/* Start a timer. The timer is set to RUNNING mode and - * time elapsed while the timer is running is added to - * the timer. - * The timer should not already be running. - */ -void visc_StartTimer(struct visc_Timer *timer); - -/* Stop a timer. - * This stops adding elapsed time to the timer. - * The timer should not already be stopped. - */ -void visc_StopTimer(struct visc_Timer *timer); - -/* Get the elapsed time in seconds. */ -double visc_GetElapsedTime(struct visc_Timer *timer); - -/* Execution time is assigned to one of these categories. */ -enum visc_TimerID { - visc_TimerID_NONE = 0, - visc_TimerID_IO, /* Time spent in input/output */ - visc_TimerID_KERNEL, /* Time spent computing on the device, - * recorded asynchronously */ - visc_TimerID_COPY, /* Time spent synchronously moving data - * to/from device and allocating/freeing - * memory on the device */ - visc_TimerID_DRIVER, /* Time spent in the host interacting with the - * driver, primarily for recording the time - * spent queueing asynchronous operations */ - visc_TimerID_COPY_ASYNC, /* Time spent in asynchronous transfers */ - visc_TimerID_COMPUTE, /* Time for all program execution other - * than parsing command line arguments, - * I/O, kernel, and copy */ - visc_TimerID_OVERLAP, /* Time double-counted in asynchronous and - * host activity: automatically filled in, - * not intended for direct usage */ - // GPU FUNCTION - visc_TimerID_INIT_CTX, - visc_TimerID_CLEAR_CTX, - visc_TimerID_COPY_SCALAR, - visc_TimerID_COPY_PTR, - visc_TimerID_MEM_FREE, - visc_TimerID_READ_OUTPUT, - visc_TimerID_SETUP, - visc_TimerID_MEM_TRACK, - visc_TimerID_MEM_UNTRACK, - visc_TimerID_MISC, - // LAUNCH FUNCTION - visc_TimerID_PTHREAD_CREATE, - visc_TimerID_ARG_PACK, - visc_TimerID_ARG_UNPACK, - visc_TimerID_COMPUTATION, - visc_TimerID_OUTPUT_PACK, - visc_TimerID_OUTPUT_UNPACK, - - visc_TimerID_LAST /* Number of timer IDs */ -}; - -/* Dynamic list of asynchronously tracked times between events */ -struct visc_async_time_marker_list { - char *label; // actually just a pointer to a string - enum visc_TimerID timerID; /* The ID to which the interval beginning - * with this marker should be attributed */ - void *marker; - // cudaEvent_t marker; /* The driver event for this marker */ - struct visc_async_time_marker_list *next; -}; - -struct visc_SubTimer { - char *label; - struct visc_Timer timer; - struct visc_SubTimer *next; -}; - -struct visc_SubTimerList { - struct visc_SubTimer *current; - struct visc_SubTimer *subtimer_list; -}; - -/* A set of timers for recording execution times. */ -struct visc_TimerSet { - enum visc_TimerID current; - struct visc_async_time_marker_list *async_markers; - visc_Timestamp async_begin; - visc_Timestamp wall_begin; - struct visc_Timer timers[visc_TimerID_LAST]; - struct visc_SubTimerList *sub_timer_list[visc_TimerID_LAST]; -}; - -/* Reset all timers in the set. */ -void visc_InitializeTimerSet(struct visc_TimerSet *timers); - -void visc_AddSubTimer(struct visc_TimerSet *timers, char *label, - enum visc_TimerID visc_Category); - -/* Select which timer the next interval of time should be accounted - * to. The selected timer is started and other timers are stopped. - * Using visc_TimerID_NONE stops all timers. */ -inline void visc_SwitchToTimer(struct visc_TimerSet *timers, - enum visc_TimerID timer); - -void visc_SwitchToSubTimer(struct visc_TimerSet *timers, char *label, - enum visc_TimerID category); - -/* Print timer values to standard output. */ -void visc_PrintTimerSet(struct visc_TimerSet *timers); - -/* Release timer resources */ -void visc_DestroyTimerSet(struct visc_TimerSet *timers); -} -#endif // VISC_RT_HEADER diff --git a/hpvm/lib/Transforms/BuildDFG/BuildDFG.cpp b/hpvm/lib/Transforms/BuildDFG/BuildDFG.cpp index 058419f1dc80a8650e7a3b834090a88099741431..be3e6cae3dae775716fc3e2206879e978febddb0 100644 --- a/hpvm/lib/Transforms/BuildDFG/BuildDFG.cpp +++ b/hpvm/lib/Transforms/BuildDFG/BuildDFG.cpp @@ -10,8 +10,8 @@ #define DEBUG_TYPE "buildDFG" #include "BuildDFG/BuildDFG.h" -#include "SupportVISC/VISCHint.h" -#include "SupportVISC/VISCUtils.h" +#include "SupportHPVM/HPVMHint.h" +#include "SupportHPVM/HPVMUtils.h" #include "llvm/ADT/Statistic.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/ValueSymbolTable.h" @@ -35,7 +35,7 @@ bool BuildDFG::runOnModule(Module &M) { for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i) { Instruction *I = &*i; // Grab pointer to Instruction - if (isViscLaunchIntrinsic(I)) { + if (isHPVMLaunchIntrinsic(I)) { DEBUG(errs() << "------------ Found launch site --------------\n"); II = cast<IntrinsicInst>(I); @@ -43,7 +43,7 @@ bool BuildDFG::runOnModule(Module &M) { // Intrinsic Instruction has been initialized from this point on. Function *F = cast<Function>(II->getOperand(0)->stripPointerCasts()); - Root = DFInternalNode::Create(II, F, viscUtils::getPreferredTarget(F)); + Root = DFInternalNode::Create(II, F, hpvmUtils::getPreferredTarget(F)); Roots.push_back(Root); BuildGraph(Root, F); @@ -118,37 +118,37 @@ void BuildDFG::removeElementFromHandleToDFEdgeMap(Value *V) { HandleToDFEdgeMap.erase(V); } -// Returns true if instruction I is a visc launch intrinsic, false otherwise -bool BuildDFG::isViscLaunchIntrinsic(Instruction *I) { +// Returns true if instruction I is a hpvm launch intrinsic, false otherwise +bool BuildDFG::isHPVMLaunchIntrinsic(Instruction *I) { if (!isa<IntrinsicInst>(I)) return false; IntrinsicInst *II = cast<IntrinsicInst>(I); - return (II->getCalledFunction()->getName()).equals("llvm.visc.launch"); + return (II->getCalledFunction()->getName()).equals("llvm.hpvm.launch"); } -// Returns true if instruction I is a visc graph intrinsic, false otherwise -bool BuildDFG::isViscGraphIntrinsic(Instruction *I) { +// Returns true if instruction I is a hpvm graph intrinsic, false otherwise +bool BuildDFG::isHPVMGraphIntrinsic(Instruction *I) { if (!isa<IntrinsicInst>(I)) return false; IntrinsicInst *II = cast<IntrinsicInst>(I); - return (II->getCalledFunction()->getName()).startswith("llvm.visc.create") || - (II->getCalledFunction()->getName()).startswith("llvm.visc.bind"); + return (II->getCalledFunction()->getName()).startswith("llvm.hpvm.create") || + (II->getCalledFunction()->getName()).startswith("llvm.hpvm.bind"); } -// Returns true if instruction I is a visc query intrinsic, false otherwise -bool BuildDFG::isViscQueryIntrinsic(Instruction *I) { +// Returns true if instruction I is a hpvm query intrinsic, false otherwise +bool BuildDFG::isHPVMQueryIntrinsic(Instruction *I) { if (!isa<IntrinsicInst>(I)) return false; IntrinsicInst *II = cast<IntrinsicInst>(I); - return (II->getCalledFunction()->getName()).startswith("llvm.visc.get"); + return (II->getCalledFunction()->getName()).startswith("llvm.hpvm.get"); } -// Returns true if instruction I is a visc intrinsic, false otherwise -bool BuildDFG::isViscIntrinsic(Instruction *I) { +// Returns true if instruction I is a hpvm intrinsic, false otherwise +bool BuildDFG::isHPVMIntrinsic(Instruction *I) { if (!isa<IntrinsicInst>(I)) return false; IntrinsicInst *II = cast<IntrinsicInst>(I); - return (II->getCalledFunction()->getName()).startswith("llvm.visc"); + return (II->getCalledFunction()->getName()).startswith("llvm.hpvm"); } // Two types are "congruent" if they are identical, or if they are both @@ -163,7 +163,7 @@ bool BuildDFG::isTypeCongruent(Type *L, Type *R) { return PL->getAddressSpace() == PR->getAddressSpace(); } -// Handles all the createNodeXX visc intrinsics. +// Handles all the createNodeXX hpvm intrinsics. void BuildDFG::handleCreateNode(DFInternalNode *N, IntrinsicInst *II) { bool isInternalNode = false; @@ -173,7 +173,7 @@ void BuildDFG::handleCreateNode(DFInternalNode *N, IntrinsicInst *II) { // internal node for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i) { Instruction *I = &*i; // Grab pointer to Instruction - if (isViscGraphIntrinsic(I)) + if (isHPVMGraphIntrinsic(I)) isInternalNode = true; } @@ -196,14 +196,14 @@ void BuildDFG::handleCreateNode(DFInternalNode *N, IntrinsicInst *II) { // Create Internal DFNode, add it to the map and recursively build its // dataflow graph DFInternalNode *childDFNode = DFInternalNode::Create( - II, F, viscUtils::getPreferredTarget(F), N, numOfDim, dimLimits); + II, F, hpvmUtils::getPreferredTarget(F), N, numOfDim, dimLimits); N->addChildToDFGraph(childDFNode); HandleToDFNodeMap[II] = childDFNode; BuildGraph(childDFNode, F); } else { // Create Leaf DFnode and add it to the map. DFLeafNode *childDFNode = DFLeafNode::Create( - II, F, viscUtils::getPreferredTarget(F), N, numOfDim, dimLimits); + II, F, hpvmUtils::getPreferredTarget(F), N, numOfDim, dimLimits); N->addChildToDFGraph(childDFNode); HandleToDFNodeMap[II] = childDFNode; } @@ -336,11 +336,11 @@ void BuildDFG::handleBindOutput(DFInternalNode *N, IntrinsicInst *II) { void BuildDFG::BuildGraph(DFInternalNode *N, Function *F) { DEBUG(errs() << "FUNCTION: " << F->getName() << "\n"); - // TODO: Place checks for valid visc functions. For example one of the - // check can be that any function that contains visc dataflow graph + // TODO: Place checks for valid hpvm functions. For example one of the + // check can be that any function that contains hpvm dataflow graph // construction intrinsics should not have other llvm IR statements. - // Iterate over all the instructions of a function and look for visc + // Iterate over all the instructions of a function and look for hpvm // intrinsics. for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i) { Instruction *I = &*i; // Grab pointer to Instruction @@ -349,25 +349,25 @@ void BuildDFG::BuildGraph(DFInternalNode *N, Function *F) { DEBUG(errs() << "IntrinsicID = " << II->getIntrinsicID() << ": " << II->getCalledFunction()->getName() << "\n"); switch (II->getIntrinsicID()) { - case Intrinsic::visc_createNode: - case Intrinsic::visc_createNode1D: - case Intrinsic::visc_createNode2D: - case Intrinsic::visc_createNode3D: + case Intrinsic::hpvm_createNode: + case Intrinsic::hpvm_createNode1D: + case Intrinsic::hpvm_createNode2D: + case Intrinsic::hpvm_createNode3D: handleCreateNode(N, II); break; - case Intrinsic::visc_createEdge: + case Intrinsic::hpvm_createEdge: handleCreateEdge(N, II); break; - case Intrinsic::visc_bind_input: + case Intrinsic::hpvm_bind_input: handleBindInput(N, II); break; - case Intrinsic::visc_bind_output: + case Intrinsic::hpvm_bind_output: handleBindOutput(N, II); break; // TODO: Reconsider launch within a dataflow graph (recursion?) - case Intrinsic::visc_wait: - case Intrinsic::visc_launch: + case Intrinsic::hpvm_wait: + case Intrinsic::hpvm_launch: DEBUG(errs() << "Error: Launch/wait intrinsic used within a dataflow graph\n\t" << *II << "\n"); @@ -375,7 +375,7 @@ void BuildDFG::BuildGraph(DFInternalNode *N, Function *F) { default: DEBUG( - errs() << "Error: Invalid VISC Intrinsic inside Internal node!\n\t" + errs() << "Error: Invalid HPVM Intrinsic inside Internal node!\n\t" << *II << "\n"); break; } diff --git a/hpvm/lib/Transforms/CMakeLists.txt b/hpvm/lib/Transforms/CMakeLists.txt index 68724684e56648d307df52624e47ed7393bfd3f9..5c9b8b9fe026ea5612caa124535e02d28d619c53 100644 --- a/hpvm/lib/Transforms/CMakeLists.txt +++ b/hpvm/lib/Transforms/CMakeLists.txt @@ -2,5 +2,5 @@ add_subdirectory(BuildDFG) add_subdirectory(ClearDFG) add_subdirectory(DFG2LLVM_NVPTX) add_subdirectory(DFG2LLVM_X86) -add_subdirectory(GenVISC) +add_subdirectory(GenHPVM) add_subdirectory(LocalMem) diff --git a/hpvm/lib/Transforms/ClearDFG/ClearDFG.cpp b/hpvm/lib/Transforms/ClearDFG/ClearDFG.cpp index 6dae9e6977d31a0b62a9fa903966ec10810a2f71..c23043e7829a8947a995f7ad97688091c46cf23d 100644 --- a/hpvm/lib/Transforms/ClearDFG/ClearDFG.cpp +++ b/hpvm/lib/Transforms/ClearDFG/ClearDFG.cpp @@ -18,7 +18,7 @@ using namespace llvm; using namespace builddfg; -// STATISTIC(IntrinsicCounter, "Counts number of visc intrinsics greeted"); +// STATISTIC(IntrinsicCounter, "Counts number of hpvm intrinsics greeted"); namespace { @@ -101,8 +101,8 @@ bool ClearDFG::runOnModule(Module &M) { // BuildDFG::HandleToDFNode &HandleToDFNodeMap = DFG.getHandleToDFNodeMap(); // BuildDFG::HandleToDFEdge &HandleToDFEdgeMap = DFG.getHandleToDFEdgeMap(); - Function *VI = M.getFunction("llvm.visc.init"); - assert(VI->hasOneUse() && "More than one use of llvm.visc.init\n"); + Function *VI = M.getFunction("llvm.hpvm.init"); + assert(VI->hasOneUse() && "More than one use of llvm.hpvm.init\n"); for (Value::user_iterator ui = VI->user_begin(), ue = VI->user_end(); ui != ue; ui++) { Instruction *I = dyn_cast<Instruction>(*ui); @@ -111,8 +111,8 @@ bool ClearDFG::runOnModule(Module &M) { VI->replaceAllUsesWith(UndefValue::get(VI->getType())); VI->eraseFromParent(); - Function *VC = M.getFunction("llvm.visc.cleanup"); - assert(VC->hasOneUse() && "More than one use of llvm.visc.cleanup\n"); + Function *VC = M.getFunction("llvm.hpvm.cleanup"); + assert(VC->hasOneUse() && "More than one use of llvm.hpvm.cleanup\n"); for (Value::user_iterator ui = VC->user_begin(), ue = VC->user_end(); ui != ue; ui++) { Instruction *I = dyn_cast<Instruction>(*ui); diff --git a/hpvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp b/hpvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp index 8a36e3b8af5c031715d1e341f3ac166501c0a5b9..f582a9ab6a4510b5d403d0709f2a06d0339d5a93 100644 --- a/hpvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp +++ b/hpvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp @@ -15,40 +15,39 @@ #define SHARED_ADDRSPACE 3 #define DEBUG_TYPE "DFG2LLVM_NVPTX" +#include "SupportHPVM/DFG2LLVM.h" +#include "SupportHPVM/HPVMTimer.h" +#include "SupportHPVM/HPVMUtils.h" +#include "llvm-c/Core.h" +#include "llvm/IR/Attributes.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/IRBuilder.h" -#include "llvm/IR/Module.h" -#include "llvm/Pass.h" #include "llvm/IR/InstIterator.h" -#include "llvm/Transforms/Utils/ValueMapper.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/IR/Module.h" #include "llvm/IRReader/IRReader.h" #include "llvm/Linker/Linker.h" -#include "llvm/Support/SourceMgr.h" +#include "llvm/Pass.h" #include "llvm/Support/FileSystem.h" -#include "llvm/IR/Attributes.h" -#include "llvm-c/Core.h" -#include "SupportVISC/VISCTimer.h" -#include "SupportVISC/DFG2LLVM.h" -#include "SupportVISC/VISCUtils.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/ValueMapper.h" #include "llvm/IR/IRPrintingPasses.h" #include "llvm/IR/LegacyPassManager.h" -#include "llvm/Support/ToolOutputFile.h" #include "llvm/IR/UseListOrder.h" - +#include "llvm/Support/ToolOutputFile.h" #include <sstream> using namespace llvm; using namespace builddfg; using namespace dfg2llvm; -using namespace viscUtils; +using namespace hpvmUtils; -// VISC Command line option to use timer or not -static cl::opt<bool> -VISCTimer_NVPTX("visc-timers-ptx", cl::desc("Enable visc timers")); +// HPVM Command line option to use timer or not +static cl::opt<bool> HPVMTimer_NVPTX("hpvm-timers-ptx", + cl::desc("Enable hpvm timers")); namespace { // Helper class declarations @@ -57,94 +56,88 @@ namespace { // in bytes. Would have preferred to use tuple but support not yet available class OutputPtr { public: - OutputPtr(Value* _h_ptr, Value* _d_ptr, Value* _bytes) - : h_ptr(_h_ptr), d_ptr(_d_ptr), bytes(_bytes) {} + OutputPtr(Value *_h_ptr, Value *_d_ptr, Value *_bytes) + : h_ptr(_h_ptr), d_ptr(_d_ptr), bytes(_bytes) {} - Value* h_ptr; - Value* d_ptr; - Value* bytes; + Value *h_ptr; + Value *d_ptr; + Value *bytes; }; // Class to maintain important kernel info required for generating runtime // calls class Kernel { public: - Kernel(Function* _KF, DFLeafNode* _KLeafNode, std::map<unsigned, unsigned> _inArgMap = - std::map<unsigned, unsigned>(), - std::map<unsigned, std::pair<Value*, unsigned> > _sharedInArgMap = - std::map<unsigned, std::pair<Value*, unsigned> >(), - std::vector<unsigned> _outArgMap = std::vector<unsigned>(), - unsigned _gridDim = 0, std::vector<Value*> _globalWGSize = std::vector<Value*>(), - unsigned _blockDim = 0, std::vector<Value*> _localWGSize = std::vector<Value*>()) - : KernelFunction(_KF), KernelLeafNode(_KLeafNode), inArgMap(_inArgMap), - sharedInArgMap(_sharedInArgMap), outArgMap(_outArgMap), gridDim(_gridDim), - globalWGSize(_globalWGSize), blockDim(_blockDim), localWGSize(_localWGSize) { - - assert(gridDim == globalWGSize.size() - && "gridDim should be same as the size of vector globalWGSize"); - assert(blockDim == localWGSize.size() - && "blockDim should be same as the size of vector localWGSize"); + Kernel( + Function *_KF, DFLeafNode *_KLeafNode, + std::map<unsigned, unsigned> _inArgMap = std::map<unsigned, unsigned>(), + std::map<unsigned, std::pair<Value *, unsigned>> _sharedInArgMap = + std::map<unsigned, std::pair<Value *, unsigned>>(), + std::vector<unsigned> _outArgMap = std::vector<unsigned>(), + unsigned _gridDim = 0, + std::vector<Value *> _globalWGSize = std::vector<Value *>(), + unsigned _blockDim = 0, + std::vector<Value *> _localWGSize = std::vector<Value *>()) + : KernelFunction(_KF), KernelLeafNode(_KLeafNode), inArgMap(_inArgMap), + sharedInArgMap(_sharedInArgMap), outArgMap(_outArgMap), + gridDim(_gridDim), globalWGSize(_globalWGSize), blockDim(_blockDim), + localWGSize(_localWGSize) { + + assert(gridDim == globalWGSize.size() && + "gridDim should be same as the size of vector globalWGSize"); + assert(blockDim == localWGSize.size() && + "blockDim should be same as the size of vector localWGSize"); } - Function* KernelFunction; - DFLeafNode* KernelLeafNode; + Function *KernelFunction; + DFLeafNode *KernelLeafNode; std::map<unsigned, unsigned> inArgMap; // Map for shared memory arguments - std::map<unsigned, std::pair<Value*, unsigned> > sharedInArgMap; + std::map<unsigned, std::pair<Value *, unsigned>> sharedInArgMap; // Fields for (potential) allocation node - DFLeafNode* AllocationNode; - Function* AllocationFunction; + DFLeafNode *AllocationNode; + Function *AllocationFunction; std::map<unsigned, unsigned> allocInArgMap; std::vector<unsigned> outArgMap; unsigned gridDim; - std::vector<Value*> globalWGSize; + std::vector<Value *> globalWGSize; unsigned blockDim; - std::vector<Value*> localWGSize; + std::vector<Value *> localWGSize; std::vector<int> localDimMap; - std::map<unsigned, unsigned> &getInArgMap() { - return inArgMap; - } - void setInArgMap(std::map<unsigned, unsigned> map) { - inArgMap = map; - } + std::map<unsigned, unsigned> &getInArgMap() { return inArgMap; } + void setInArgMap(std::map<unsigned, unsigned> map) { inArgMap = map; } - std::map<unsigned, std::pair<Value*, unsigned> > &getSharedInArgMap() { + std::map<unsigned, std::pair<Value *, unsigned>> &getSharedInArgMap() { return sharedInArgMap; } - void setSharedInArgMap(std::map<unsigned, std::pair<Value*, unsigned> > map) { + void setSharedInArgMap(std::map<unsigned, std::pair<Value *, unsigned>> map) { sharedInArgMap = map; } - std::vector<unsigned> &getOutArgMap() { - return outArgMap; - } - void setOutArgMap(std::vector<unsigned> map) { - outArgMap = map; - } + std::vector<unsigned> &getOutArgMap() { return outArgMap; } + void setOutArgMap(std::vector<unsigned> map) { outArgMap = map; } - void setLocalWGSize(std::vector<Value*> V) { - localWGSize = V; - } + void setLocalWGSize(std::vector<Value *> V) { localWGSize = V; } - bool hasLocalWG() const { - return blockDim != 0; - } + bool hasLocalWG() const { return blockDim != 0; } }; // Helper function declarations -static bool canBePromoted(Argument* arg, Function* F); -static void getExecuteNodeParams(Module &M, Value* &, Value* &, Value* &, Kernel*, - ValueToValueMapTy&, Instruction*); -static Value* genWorkGroupPtr(Module &M, std::vector<Value*>, ValueToValueMapTy&, - Instruction*, const Twine& WGName = "WGSize"); -static std::string getPTXFilename(const Module&); -static std::string getFilenameFromModule(const Module& M); +static bool canBePromoted(Argument *arg, Function *F); +static void getExecuteNodeParams(Module &M, Value *&, Value *&, Value *&, + Kernel *, ValueToValueMapTy &, Instruction *); +static Value *genWorkGroupPtr(Module &M, std::vector<Value *>, + ValueToValueMapTy &, Instruction *, + const Twine &WGName = "WGSize"); +static std::string getPTXFilename(const Module &); +static std::string getFilenameFromModule(const Module &M); static void changeDataLayout(Module &); static void changeTargetTriple(Module &); static void findReturnInst(Function *, std::vector<ReturnInst *> &); -static void findIntrinsicInst(Function *, Intrinsic::ID, std::vector<IntrinsicInst *> &); +static void findIntrinsicInst(Function *, Intrinsic::ID, + std::vector<IntrinsicInst *> &); static AtomicRMWInst::BinOp getAtomicOp(Intrinsic::ID); static std::string getAtomicOpName(Intrinsic::ID); @@ -154,7 +147,6 @@ struct DFG2LLVM_NVPTX : public DFG2LLVM { DFG2LLVM_NVPTX() : DFG2LLVM(ID) {} private: - public: bool runOnModule(Module &M); }; @@ -163,57 +155,60 @@ public: class CGT_NVPTX : public CodeGenTraversal { private: - //Member variables + // Member variables std::unique_ptr<Module> KernelM; - DFNode* KernelLaunchNode = NULL; - Kernel* kernel; - - // VISC Runtime API - FunctionCallee llvm_visc_ocl_launch; - FunctionCallee llvm_visc_ocl_wait; - FunctionCallee llvm_visc_ocl_initContext; - FunctionCallee llvm_visc_ocl_clearContext; - FunctionCallee llvm_visc_ocl_argument_shared; - FunctionCallee llvm_visc_ocl_argument_scalar; - FunctionCallee llvm_visc_ocl_argument_ptr; - FunctionCallee llvm_visc_ocl_output_ptr; - FunctionCallee llvm_visc_ocl_free; - FunctionCallee llvm_visc_ocl_getOutput; - FunctionCallee llvm_visc_ocl_executeNode; - - //Functions + DFNode *KernelLaunchNode = NULL; + Kernel *kernel; + + // HPVM Runtime API + FunctionCallee llvm_hpvm_ocl_launch; + FunctionCallee llvm_hpvm_ocl_wait; + FunctionCallee llvm_hpvm_ocl_initContext; + FunctionCallee llvm_hpvm_ocl_clearContext; + FunctionCallee llvm_hpvm_ocl_argument_shared; + FunctionCallee llvm_hpvm_ocl_argument_scalar; + FunctionCallee llvm_hpvm_ocl_argument_ptr; + FunctionCallee llvm_hpvm_ocl_output_ptr; + FunctionCallee llvm_hpvm_ocl_free; + FunctionCallee llvm_hpvm_ocl_getOutput; + FunctionCallee llvm_hpvm_ocl_executeNode; + + // Functions std::string getKernelsModuleName(Module &M); - void fixValueAddrspace(Value* V, unsigned addrspace); - std::vector<unsigned> globalToConstantMemoryOpt(std::vector<unsigned>*, Function*); - Function* changeArgAddrspace(Function* F, std::vector<unsigned> &Ags, unsigned i); - void addCLMetadata(Function* F); - Function* transformFunctionToVoid(Function* F); - void insertRuntimeCalls(DFInternalNode* N, Kernel* K, const Twine& FileName); + void fixValueAddrspace(Value *V, unsigned addrspace); + std::vector<unsigned> globalToConstantMemoryOpt(std::vector<unsigned> *, + Function *); + Function *changeArgAddrspace(Function *F, std::vector<unsigned> &Ags, + unsigned i); + void addCLMetadata(Function *F); + Function *transformFunctionToVoid(Function *F); + void insertRuntimeCalls(DFInternalNode *N, Kernel *K, const Twine &FileName); // Virtual Functions void init() { - VISCTimer = VISCTimer_NVPTX; + HPVMTimer = HPVMTimer_NVPTX; TargetName = "NVPTX"; } void initRuntimeAPI(); - void codeGen(DFInternalNode* N); - void codeGen(DFLeafNode* N); + void codeGen(DFInternalNode *N); + void codeGen(DFLeafNode *N); public: - // Constructor - CGT_NVPTX(Module &_M, BuildDFG &_DFG) : CodeGenTraversal(_M, _DFG), KernelM(CloneModule(_M)) { + CGT_NVPTX(Module &_M, BuildDFG &_DFG) + : CodeGenTraversal(_M, _DFG), KernelM(CloneModule(_M)) { init(); initRuntimeAPI(); - errs() << "Old module pointer: " << &_M << "\n"; - errs() << "New module pointer: " << KernelM.get() << "\n"; + DEBUG(errs() << "Old module pointer: " << &_M << "\n"); + DEBUG(errs() << "New module pointer: " << KernelM.get() << "\n"); - // Copying instead of creating new, in order to preserve required info (metadata) - // Remove functions, global variables and aliases - std::vector<GlobalVariable*> GVVect; + // Copying instead of creating new, in order to preserve required info + // (metadata) Remove functions, global variables and aliases + std::vector<GlobalVariable *> GVVect; for (Module::global_iterator mi = KernelM->global_begin(), - me = KernelM->global_end(); (mi != me); ++mi) { - GlobalVariable* GV = &*mi; + me = KernelM->global_end(); + (mi != me); ++mi) { + GlobalVariable *GV = &*mi; GVVect.push_back(GV); } for (auto *GV : GVVect) { @@ -221,10 +216,10 @@ public: GV->eraseFromParent(); } - std::vector<Function*> FuncVect; - for (Module::iterator mi = KernelM->begin(), - me = KernelM->end(); (mi != me); ++mi) { - Function* F = &*mi; + std::vector<Function *> FuncVect; + for (Module::iterator mi = KernelM->begin(), me = KernelM->end(); + (mi != me); ++mi) { + Function *F = &*mi; FuncVect.push_back(F); } for (auto *F : FuncVect) { @@ -232,10 +227,11 @@ public: F->eraseFromParent(); } - std::vector<GlobalAlias*> GAVect; + std::vector<GlobalAlias *> GAVect; for (Module::alias_iterator mi = KernelM->alias_begin(), - me = KernelM->alias_end(); (mi != me); ++mi) { - GlobalAlias* GA = &*mi; + me = KernelM->alias_end(); + (mi != me); ++mi) { + GlobalAlias *GA = &*mi; GAVect.push_back(GA); } for (auto *GA : GAVect) { @@ -246,73 +242,69 @@ public: changeDataLayout(*KernelM); changeTargetTriple(*KernelM); - DEBUG(errs() << *KernelM); - } void writeKernelsModule(); }; -// Initialize the VISC runtime API. This makes it easier to insert these calls +// Initialize the HPVM runtime API. This makes it easier to insert these calls void CGT_NVPTX::initRuntimeAPI() { // Load Runtime API Module SMDiagnostic Err; - char* LLVM_SRC_ROOT = getenv("LLVM_SRC_ROOT"); + char *LLVM_SRC_ROOT = getenv("LLVM_SRC_ROOT"); assert(LLVM_SRC_ROOT != NULL && "Define LLVM_SRC_ROOT environment variable!"); Twine llvmSrcRoot = LLVM_SRC_ROOT; - Twine runtimeAPI = llvmSrcRoot + "/../build/tools/hpvm/projects/visc-rt/visc-rt.bc"; + Twine runtimeAPI = + llvmSrcRoot + "/../build/tools/hpvm/projects/hpvm-rt/hpvm-rt.bc"; runtimeModule = parseIRFile(runtimeAPI.str(), Err, M.getContext()); - if(runtimeModule == nullptr) { + if (runtimeModule == nullptr) { DEBUG(errs() << Err.getMessage() << " " << runtimeAPI << "\n"); assert(false && "couldn't parse runtime"); - } - else - DEBUG(errs() << "Successfully loaded visc-rt API module\n"); + } else + DEBUG(errs() << "Successfully loaded hpvm-rt API module\n"); // Get or insert the global declarations for launch/wait functions - DECLARE(llvm_visc_ocl_launch); - DECLARE(llvm_visc_ocl_wait); - DECLARE(llvm_visc_ocl_initContext); - DECLARE(llvm_visc_ocl_clearContext); - DECLARE(llvm_visc_ocl_argument_shared); - DECLARE(llvm_visc_ocl_argument_scalar); - DECLARE(llvm_visc_ocl_argument_ptr); - DECLARE(llvm_visc_ocl_output_ptr); - DECLARE(llvm_visc_ocl_free); - DECLARE(llvm_visc_ocl_getOutput); - DECLARE(llvm_visc_ocl_executeNode); + DECLARE(llvm_hpvm_ocl_launch); + DECLARE(llvm_hpvm_ocl_wait); + DECLARE(llvm_hpvm_ocl_initContext); + DECLARE(llvm_hpvm_ocl_clearContext); + DECLARE(llvm_hpvm_ocl_argument_shared); + DECLARE(llvm_hpvm_ocl_argument_scalar); + DECLARE(llvm_hpvm_ocl_argument_ptr); + DECLARE(llvm_hpvm_ocl_output_ptr); + DECLARE(llvm_hpvm_ocl_free); + DECLARE(llvm_hpvm_ocl_getOutput); + DECLARE(llvm_hpvm_ocl_executeNode); // Get or insert timerAPI functions as well if you plan to use timers initTimerAPI(); // Insert init context in main DEBUG(errs() << "Gen Code to initialize NVPTX Timer\n"); - Function* VI = M.getFunction("llvm.visc.init"); - assert(VI->getNumUses() == 1 && "__visc__init should only be used once"); + Function *VI = M.getFunction("llvm.hpvm.init"); + assert(VI->getNumUses() == 1 && "__hpvm__init should only be used once"); InitCall = cast<Instruction>(*VI->user_begin()); initializeTimerSet(InitCall); - switchToTimer(visc_TimerID_INIT_CTX, InitCall); - CallInst::Create(llvm_visc_ocl_initContext, - ArrayRef<Value*>(getTargetID(M, visc::GPU_TARGET)), - "", InitCall); - switchToTimer(visc_TimerID_NONE, InitCall); + switchToTimer(hpvm_TimerID_INIT_CTX, InitCall); + CallInst::Create(llvm_hpvm_ocl_initContext, + ArrayRef<Value *>(getTargetID(M, hpvm::GPU_TARGET)), "", + InitCall); + switchToTimer(hpvm_TimerID_NONE, InitCall); - // Insert print instruction at visc exit + // Insert print instruction at hpvm exit DEBUG(errs() << "Gen Code to print NVPTX Timer\n"); - Function* VC = M.getFunction("llvm.visc.cleanup"); + Function *VC = M.getFunction("llvm.hpvm.cleanup"); DEBUG(errs() << *VC << "\n"); - assert(VC->getNumUses() == 1 && "__visc__clear should only be used once"); + assert(VC->getNumUses() == 1 && "__hpvm__clear should only be used once"); CleanupCall = cast<Instruction>(*VC->user_begin()); printTimerSet(CleanupCall); - - } // Generate Code to call the kernel @@ -320,36 +312,37 @@ void CGT_NVPTX::initRuntimeAPI() { // used to generate a function to associate with this leaf node. The function // is responsible for all the memory allocation/transfer and invoking the // kernel call on the device -void CGT_NVPTX::insertRuntimeCalls(DFInternalNode* N, Kernel* K, const Twine& FileName) { +void CGT_NVPTX::insertRuntimeCalls(DFInternalNode *N, Kernel *K, + const Twine &FileName) { // Check if clone already exists. If it does, it means we have visited this // function before. -// assert(N->getGenFunc() == NULL && "Code already generated for this node"); + // assert(N->getGenFunc() == NULL && "Code already generated for this node"); - assert(N->getGenFuncForTarget(visc::GPU_TARGET) == NULL && + assert(N->getGenFuncForTarget(hpvm::GPU_TARGET) == NULL && "Code already generated for this node"); // Useful values - Value* True = ConstantInt::get(Type::getInt1Ty(M.getContext()), 1); - Value* False = ConstantInt::get(Type::getInt1Ty(M.getContext()), 0); + Value *True = ConstantInt::get(Type::getInt1Ty(M.getContext()), 1); + Value *False = ConstantInt::get(Type::getInt1Ty(M.getContext()), 0); // If kernel struct has not been initialized with kernel function, then fail assert(K != NULL && "No kernel found!!"); DEBUG(errs() << "Generating kernel call code\n"); - Function* F = N->getFuncPointer(); - + Function *F = N->getFuncPointer(); // Create of clone of F with no instructions. Only the type is the same as F // without the extra arguments. - Function* F_X86; + Function *F_X86; // Clone the function, if we are seeing this function for the first time. We // only need a clone in terms of type. ValueToValueMapTy VMap; // Create new function with the same type - F_X86 = Function::Create(F->getFunctionType(), F->getLinkage(), F->getName(), &M); + F_X86 = + Function::Create(F->getFunctionType(), F->getLinkage(), F->getName(), &M); // Loop over the arguments, copying the names of arguments over. Function::arg_iterator dest_iterator = F_X86->arg_begin(); @@ -362,26 +355,25 @@ void CGT_NVPTX::insertRuntimeCalls(DFInternalNode* N, Kernel* K, const Twine& Fi // Add a basic block to this empty function BasicBlock *BB = BasicBlock::Create(M.getContext(), "entry", F_X86); - ReturnInst* RI = ReturnInst::Create(M.getContext(), - UndefValue::get(F_X86->getReturnType()), BB); + ReturnInst *RI = ReturnInst::Create( + M.getContext(), UndefValue::get(F_X86->getReturnType()), BB); // FIXME: Adding Index and Dim arguments are probably not required except // for consistency purpose (DFG2LLVM_X86 does assume that all leaf nodes do // have those arguments) // Add Index and Dim arguments except for the root node - if(!N->isRoot() && !N->getParent()->isChildGraphStreaming()) + if (!N->isRoot() && !N->getParent()->isChildGraphStreaming()) F_X86 = addIdxDimArgs(F_X86); BB = &*F_X86->begin(); RI = cast<ReturnInst>(BB->getTerminator()); - //Add the generated function info to DFNode -// N->setGenFunc(F_X86, visc::CPU_TARGET); - N->addGenFunc(F_X86, visc::GPU_TARGET, true); - errs() << "Added GPUGenFunc: " << F_X86->getName() << " for node " - << N->getFuncPointer()->getName() << "\n"; - + // Add the generated function info to DFNode + // N->setGenFunc(F_X86, hpvm::CPU_TARGET); + N->addGenFunc(F_X86, hpvm::GPU_TARGET, true); + DEBUG(errs() << "Added GPUGenFunc: " << F_X86->getName() << " for node " + << N->getFuncPointer()->getName() << "\n"); // Loop over the arguments, to create the VMap dest_iterator = F_X86->arg_begin(); @@ -414,51 +406,53 @@ void CGT_NVPTX::insertRuntimeCalls(DFInternalNode* N, Kernel* K, const Twine& Fi break; } - assert(C->isDummyNode() == false && "Internal Node only contains dummy nodes!"); + assert(C->isDummyNode() == false && "Internal Node only contains dummy + nodes!"); Function* CF = C->getFuncPointer(); */ - Function* KF = K->KernelLeafNode->getFuncPointer(); + Function *KF = K->KernelLeafNode->getFuncPointer(); // Initialize context - //DEBUG(errs() << "Initializing context" << "\n"); - //CallInst::Create(llvm_visc_ocl_initContext, None, "", RI); + // DEBUG(errs() << "Initializing context" << "\n"); + // CallInst::Create(llvm_hpvm_ocl_initContext, None, "", RI); - DEBUG(errs() << "Initializing commandQ" << "\n"); + DEBUG(errs() << "Initializing commandQ" + << "\n"); // Initialize command queue - switchToTimer(visc_TimerID_SETUP, InitCall); - Value* fileStr = getStringPointer(FileName, InitCall, "Filename"); + switchToTimer(hpvm_TimerID_SETUP, InitCall); + Value *fileStr = getStringPointer(FileName, InitCall, "Filename"); DEBUG(errs() << "Kernel Filename constant: " << *fileStr << "\n"); - DEBUG(errs() << "Generating code for kernel - " << K->KernelFunction->getName()<< "\n"); - Value* kernelStr = getStringPointer(K->KernelFunction->getName(), InitCall,"KernelName"); - - Value* LaunchInstArgs[] = {fileStr, kernelStr}; - - DEBUG(errs() << "Inserting launch call" << "\n"); - CallInst* NVPTX_Ctx = CallInst::Create(llvm_visc_ocl_launch, - ArrayRef<Value*>(LaunchInstArgs, 2), - "graph"+KF->getName(), - InitCall); + DEBUG(errs() << "Generating code for kernel - " + << K->KernelFunction->getName() << "\n"); + Value *kernelStr = + getStringPointer(K->KernelFunction->getName(), InitCall, "KernelName"); + + Value *LaunchInstArgs[] = {fileStr, kernelStr}; + + DEBUG(errs() << "Inserting launch call" + << "\n"); + CallInst *NVPTX_Ctx = CallInst::Create(llvm_hpvm_ocl_launch, + ArrayRef<Value *>(LaunchInstArgs, 2), + "graph" + KF->getName(), InitCall); DEBUG(errs() << *NVPTX_Ctx << "\n"); - GraphIDAddr = new GlobalVariable(M, - NVPTX_Ctx->getType(), - false, + GraphIDAddr = new GlobalVariable(M, NVPTX_Ctx->getType(), false, GlobalValue::CommonLinkage, Constant::getNullValue(NVPTX_Ctx->getType()), - "graph"+KF->getName()+".addr"); + "graph" + KF->getName() + ".addr"); DEBUG(errs() << "Store at: " << *GraphIDAddr << "\n"); - StoreInst* SI = new StoreInst(NVPTX_Ctx, GraphIDAddr, InitCall); + StoreInst *SI = new StoreInst(NVPTX_Ctx, GraphIDAddr, InitCall); DEBUG(errs() << *SI << "\n"); - switchToTimer(visc_TimerID_NONE, InitCall); - switchToTimer(visc_TimerID_SETUP, RI); - Value* GraphID = new LoadInst(GraphIDAddr, "graph."+KF->getName(), RI); + switchToTimer(hpvm_TimerID_NONE, InitCall); + switchToTimer(hpvm_TimerID_SETUP, RI); + Value *GraphID = new LoadInst(GraphIDAddr, "graph." + KF->getName(), RI); - // Iterate over the required input edges of the node and use the visc-rt API + // Iterate over the required input edges of the node and use the hpvm-rt API // to set inputs - DEBUG(errs() << "Iterate over input edges of node and insert visc api\n"); + DEBUG(errs() << "Iterate over input edges of node and insert hpvm api\n"); std::vector<OutputPtr> OutputPointers; - // Vector to hold the device memory object that need to be cleared before we release - // context - std::vector<Value*> DevicePointers; + // Vector to hold the device memory object that need to be cleared before we + // release context + std::vector<Value *> DevicePointers; std::map<unsigned, unsigned> &kernelInArgMap = K->getInArgMap(); /* @@ -470,133 +464,134 @@ void CGT_NVPTX::insertRuntimeCalls(DFInternalNode* N, Kernel* K, const Twine& Fi */ - for(auto &InArgMapPair : kernelInArgMap) { + for (auto &InArgMapPair : kernelInArgMap) { unsigned i = InArgMapPair.first; - Value* inputVal = getArgumentAt(F_X86, InArgMapPair.second); - DEBUG(errs() << "\tArgument "<< i<< " = " << *inputVal << "\n"); + Value *inputVal = getArgumentAt(F_X86, InArgMapPair.second); + DEBUG(errs() << "\tArgument " << i << " = " << *inputVal << "\n"); // input value has been obtained. // Check if input is a scalar value or a pointer operand // For scalar values such as int, float, etc. the size is simply the size of // type on target machine, but for pointers, the size of data would be the // next integer argument - if(inputVal->getType()->isPointerTy()) { + if (inputVal->getType()->isPointerTy()) { - switchToTimer(visc_TimerID_COPY_PTR, RI); + switchToTimer(hpvm_TimerID_COPY_PTR, RI); // Pointer Input // CheckAttribute - Value* isOutput = (hasAttribute(KF, i, Attribute::Out))? True : False; - Value* isInput = ((hasAttribute(KF, i, Attribute::Out)) - && !(hasAttribute(KF, i, Attribute::In)))? False : True; - - Argument* A = getArgumentAt(KF, i); - if(isOutput == True) { + Value *isOutput = (hasAttribute(KF, i, Attribute::Out)) ? True : False; + Value *isInput = ((hasAttribute(KF, i, Attribute::Out)) && + !(hasAttribute(KF, i, Attribute::In))) + ? False + : True; + + Argument *A = getArgumentAt(KF, i); + if (isOutput == True) { DEBUG(errs() << *A << " is an OUTPUT argument\n"); } - if(isInput == True) { + if (isInput == True) { DEBUG(errs() << *A << " is an INPUT argument\n"); } - - Value* inputValI8Ptr = CastInst::CreatePointerCast(inputVal, - Type::getInt8PtrTy(M.getContext()), - inputVal->getName()+".i8ptr", - RI); + Value *inputValI8Ptr = CastInst::CreatePointerCast( + inputVal, Type::getInt8PtrTy(M.getContext()), + inputVal->getName() + ".i8ptr", RI); // Assert that the pointer argument size (next argument) is in the map - assert(kernelInArgMap.find(i+1) != kernelInArgMap.end()); - - Value* inputSize = getArgumentAt(F_X86, kernelInArgMap[i+1]); - assert(inputSize->getType() == Type::getInt64Ty(M.getContext()) - && "Pointer type input must always be followed by size (integer type)"); - Value* setInputArgs[] = {GraphID, - inputValI8Ptr, - ConstantInt::get(Type::getInt32Ty(M.getContext()),i), - inputSize, - isInput, - isOutput - }; - Value* d_ptr = CallInst::Create(llvm_visc_ocl_argument_ptr, - ArrayRef<Value*>(setInputArgs, 6), "", RI); + assert(kernelInArgMap.find(i + 1) != kernelInArgMap.end()); + + Value *inputSize = getArgumentAt(F_X86, kernelInArgMap[i + 1]); + assert( + inputSize->getType() == Type::getInt64Ty(M.getContext()) && + "Pointer type input must always be followed by size (integer type)"); + Value *setInputArgs[] = { + GraphID, + inputValI8Ptr, + ConstantInt::get(Type::getInt32Ty(M.getContext()), i), + inputSize, + isInput, + isOutput}; + Value *d_ptr = + CallInst::Create(llvm_hpvm_ocl_argument_ptr, + ArrayRef<Value *>(setInputArgs, 6), "", RI); DevicePointers.push_back(d_ptr); // If this has out attribute, store the returned device pointer in // memory to read device memory later - if(isOutput == True) OutputPointers.push_back(OutputPtr(inputValI8Ptr, d_ptr, inputSize)); - } - else { - switchToTimer(visc_TimerID_COPY_SCALAR, RI); + if (isOutput == True) + OutputPointers.push_back(OutputPtr(inputValI8Ptr, d_ptr, inputSize)); + } else { + switchToTimer(hpvm_TimerID_COPY_SCALAR, RI); // Scalar Input // Store the scalar value on stack and then pass the pointer to its // location - AllocaInst* inputValPtr = new AllocaInst(inputVal->getType(), 0, inputVal->getName()+".ptr", RI); - StoreInst* SI = new StoreInst(inputVal, inputValPtr, RI); - - Value* inputValI8Ptr = CastInst::CreatePointerCast(inputValPtr, - Type::getInt8PtrTy(M.getContext()), - inputVal->getName()+".i8ptr", - RI); - - Value* setInputArgs[] = {GraphID, - inputValI8Ptr, - ConstantInt::get(Type::getInt32Ty(M.getContext()),i), - ConstantExpr::getSizeOf(inputVal->getType()) - }; - CallInst::Create(llvm_visc_ocl_argument_scalar, - ArrayRef<Value*>(setInputArgs, 4), "", RI); + AllocaInst *inputValPtr = new AllocaInst( + inputVal->getType(), 0, inputVal->getName() + ".ptr", RI); + StoreInst *SI = new StoreInst(inputVal, inputValPtr, RI); + + Value *inputValI8Ptr = CastInst::CreatePointerCast( + inputValPtr, Type::getInt8PtrTy(M.getContext()), + inputVal->getName() + ".i8ptr", RI); + + Value *setInputArgs[] = { + GraphID, inputValI8Ptr, + ConstantInt::get(Type::getInt32Ty(M.getContext()), i), + ConstantExpr::getSizeOf(inputVal->getType())}; + CallInst::Create(llvm_hpvm_ocl_argument_scalar, + ArrayRef<Value *>(setInputArgs, 4), "", RI); } } - DEBUG(errs() << "Setup shared memory arguments of node and insert visc api\n"); + DEBUG( + errs() << "Setup shared memory arguments of node and insert hpvm api\n"); // Check to see if all the allocation sizes are constant (determined // statically) bool constSizes = true; - for (auto& e: K->getSharedInArgMap()) { + for (auto &e : K->getSharedInArgMap()) { constSizes &= isa<Constant>(e.second.first); } // If the sizes are all constant if (constSizes) { - for (auto& e: K->getSharedInArgMap()) { + for (auto &e : K->getSharedInArgMap()) { unsigned argNum = e.first; - Value* allocSize = e.second.first; + Value *allocSize = e.second.first; - DEBUG(errs() << "\tLocal Memory at "<< argNum << ", size = " << *allocSize << "\n"); + DEBUG(errs() << "\tLocal Memory at " << argNum + << ", size = " << *allocSize << "\n"); if (KF->getFunctionType()->getParamType(argNum)->isPointerTy()) { // Shared memory ptr argument - scalar at size position - switchToTimer(visc_TimerID_COPY_SCALAR, RI); + switchToTimer(hpvm_TimerID_COPY_SCALAR, RI); - assert(isa<Constant>(allocSize) && "Constant shared memory size is expected"); + assert(isa<Constant>(allocSize) && + "Constant shared memory size is expected"); - Value* setInputArgs[] = {GraphID, - ConstantInt::get(Type::getInt32Ty(M.getContext()),argNum), - allocSize - }; - CallInst::Create(llvm_visc_ocl_argument_shared, - ArrayRef<Value*>(setInputArgs, 3), "", RI); - } - else { + Value *setInputArgs[] = { + GraphID, ConstantInt::get(Type::getInt32Ty(M.getContext()), argNum), + allocSize}; + CallInst::Create(llvm_hpvm_ocl_argument_shared, + ArrayRef<Value *>(setInputArgs, 3), "", RI); + } else { // Sharem memory size argument - scalar at address position - switchToTimer(visc_TimerID_COPY_SCALAR, RI); + switchToTimer(hpvm_TimerID_COPY_SCALAR, RI); // Store the scalar value on stack and then pass the pointer to its // location - AllocaInst* allocSizePtr = new AllocaInst(allocSize->getType(), 0, - allocSize->getName()+".sharedMem.ptr", RI); - StoreInst* SI = new StoreInst(allocSize, allocSizePtr, RI); - - Value* allocSizeI8Ptr = CastInst::CreatePointerCast(allocSizePtr, - Type::getInt8PtrTy(M.getContext()), - allocSize->getName()+".sharedMem.i8ptr", - RI); - - Value* setInputArgs[] = {GraphID, - allocSizeI8Ptr, - ConstantInt::get(Type::getInt32Ty(M.getContext()),argNum), - ConstantExpr::getSizeOf(allocSize->getType()) - }; - CallInst::Create(llvm_visc_ocl_argument_scalar, - ArrayRef<Value*>(setInputArgs, 4), "", RI); + AllocaInst *allocSizePtr = + new AllocaInst(allocSize->getType(), 0, + allocSize->getName() + ".sharedMem.ptr", RI); + StoreInst *SI = new StoreInst(allocSize, allocSizePtr, RI); + + Value *allocSizeI8Ptr = CastInst::CreatePointerCast( + allocSizePtr, Type::getInt8PtrTy(M.getContext()), + allocSize->getName() + ".sharedMem.i8ptr", RI); + + Value *setInputArgs[] = { + GraphID, allocSizeI8Ptr, + ConstantInt::get(Type::getInt32Ty(M.getContext()), argNum), + ConstantExpr::getSizeOf(allocSize->getType())}; + CallInst::Create(llvm_hpvm_ocl_argument_scalar, + ArrayRef<Value *>(setInputArgs, 4), "", RI); } } } else { @@ -617,68 +612,64 @@ void CGT_NVPTX::insertRuntimeCalls(DFInternalNode* N, Kernel* K, const Twine& Fi ExtractValueInstVec.push_back(EI); } - for (auto& e: K->getSharedInArgMap()) { + for (auto &e : K->getSharedInArgMap()) { unsigned argNum = e.first; - Value* allocSize = ExtractValueInstVec[e.second.second/2]; + Value *allocSize = ExtractValueInstVec[e.second.second / 2]; - DEBUG(errs() << "\tLocal Memory at "<< argNum << ", size = " << *allocSize << "\n"); + DEBUG(errs() << "\tLocal Memory at " << argNum + << ", size = " << *allocSize << "\n"); if (KF->getFunctionType()->getParamType(argNum)->isPointerTy()) { // Shared memory ptr argument - scalar at size position - switchToTimer(visc_TimerID_COPY_SCALAR, RI); - - Value* setInputArgs[] = {GraphID, - ConstantInt::get(Type::getInt32Ty(M.getContext()),argNum), - allocSize - }; - CallInst::Create(llvm_visc_ocl_argument_shared, - ArrayRef<Value*>(setInputArgs, 3), "", RI); - } - else { + switchToTimer(hpvm_TimerID_COPY_SCALAR, RI); + + Value *setInputArgs[] = { + GraphID, ConstantInt::get(Type::getInt32Ty(M.getContext()), argNum), + allocSize}; + CallInst::Create(llvm_hpvm_ocl_argument_shared, + ArrayRef<Value *>(setInputArgs, 3), "", RI); + } else { // Sharem memory size argument - scalar at address position - switchToTimer(visc_TimerID_COPY_SCALAR, RI); + switchToTimer(hpvm_TimerID_COPY_SCALAR, RI); // Store the scalar value on stack and then pass the pointer to its // location - AllocaInst* allocSizePtr = new AllocaInst(allocSize->getType(), 0, - allocSize->getName()+".sharedMem.ptr", RI); - StoreInst* SI = new StoreInst(allocSize, allocSizePtr, RI); - - Value* allocSizeI8Ptr = CastInst::CreatePointerCast(allocSizePtr, - Type::getInt8PtrTy(M.getContext()), - allocSize->getName()+".sharedMem.i8ptr", - RI); - - Value* setInputArgs[] = {GraphID, - allocSizeI8Ptr, - ConstantInt::get(Type::getInt32Ty(M.getContext()),argNum), - ConstantExpr::getSizeOf(allocSize->getType()) - }; - CallInst::Create(llvm_visc_ocl_argument_scalar, - ArrayRef<Value*>(setInputArgs, 4), "", RI); + AllocaInst *allocSizePtr = + new AllocaInst(allocSize->getType(), 0, + allocSize->getName() + ".sharedMem.ptr", RI); + StoreInst *SI = new StoreInst(allocSize, allocSizePtr, RI); + + Value *allocSizeI8Ptr = CastInst::CreatePointerCast( + allocSizePtr, Type::getInt8PtrTy(M.getContext()), + allocSize->getName() + ".sharedMem.i8ptr", RI); + + Value *setInputArgs[] = { + GraphID, allocSizeI8Ptr, + ConstantInt::get(Type::getInt32Ty(M.getContext()), argNum), + ConstantExpr::getSizeOf(allocSize->getType())}; + CallInst::Create(llvm_hpvm_ocl_argument_scalar, + ArrayRef<Value *>(setInputArgs, 4), "", RI); } } } - - DEBUG(errs() << "Setup output edges of node and insert visc api\n"); + DEBUG(errs() << "Setup output edges of node and insert hpvm api\n"); // Set output if struct is not an empty struct - StructType* OutputTy = K->KernelLeafNode->getOutputType(); - std::vector<Value*> d_Outputs; - if(!OutputTy->isEmptyTy()) { - switchToTimer(visc_TimerID_COPY_PTR, RI); + StructType *OutputTy = K->KernelLeafNode->getOutputType(); + std::vector<Value *> d_Outputs; + if (!OutputTy->isEmptyTy()) { + switchToTimer(hpvm_TimerID_COPY_PTR, RI); // Not an empty struct // Iterate over all elements of the struct and put them in - for(unsigned i=0; i < OutputTy->getNumElements(); i++) { - unsigned outputIndex = KF->getFunctionType()->getNumParams()+i; - Value* setOutputArgs[] = {GraphID, - ConstantInt::get(Type::getInt32Ty(M.getContext()),outputIndex), - ConstantExpr::getSizeOf(OutputTy->getElementType(i)) - }; - - CallInst* d_Output = CallInst::Create(llvm_visc_ocl_output_ptr, - ArrayRef<Value*>(setOutputArgs, 3), - "d_output."+KF->getName(), - RI); + for (unsigned i = 0; i < OutputTy->getNumElements(); i++) { + unsigned outputIndex = KF->getFunctionType()->getNumParams() + i; + Value *setOutputArgs[] = { + GraphID, + ConstantInt::get(Type::getInt32Ty(M.getContext()), outputIndex), + ConstantExpr::getSizeOf(OutputTy->getElementType(i))}; + + CallInst *d_Output = CallInst::Create(llvm_hpvm_ocl_output_ptr, + ArrayRef<Value *>(setOutputArgs, 3), + "d_output." + KF->getName(), RI); d_Outputs.push_back(d_Output); } } @@ -688,50 +679,41 @@ void CGT_NVPTX::insertRuntimeCalls(DFInternalNode* N, Kernel* K, const Twine& Fi // Allocate size_t[numDims] space on stack. Store the work group sizes and // pass it as an argument to ExecNode - switchToTimer(visc_TimerID_MISC, RI); + switchToTimer(hpvm_TimerID_MISC, RI); Value *workDim, *LocalWGPtr, *GlobalWGPtr; getExecuteNodeParams(M, workDim, LocalWGPtr, GlobalWGPtr, K, VMap, RI); - switchToTimer(visc_TimerID_KERNEL, RI); - Value* ExecNodeArgs[] = {GraphID, - workDim, - LocalWGPtr, - GlobalWGPtr - }; - CallInst* Event = CallInst::Create(llvm_visc_ocl_executeNode, - ArrayRef<Value*>(ExecNodeArgs, 4), - "event."+KF->getName(), - RI); + switchToTimer(hpvm_TimerID_KERNEL, RI); + Value *ExecNodeArgs[] = {GraphID, workDim, LocalWGPtr, GlobalWGPtr}; + CallInst *Event = CallInst::Create(llvm_hpvm_ocl_executeNode, + ArrayRef<Value *>(ExecNodeArgs, 4), + "event." + KF->getName(), RI); DEBUG(errs() << "Execute Node Call: " << *Event << "\n"); // Wait for Kernel to Finish - CallInst::Create(llvm_visc_ocl_wait, - ArrayRef<Value*>(GraphID), - "", - RI); + CallInst::Create(llvm_hpvm_ocl_wait, ArrayRef<Value *>(GraphID), "", RI); - switchToTimer(visc_TimerID_READ_OUTPUT, RI); + switchToTimer(hpvm_TimerID_READ_OUTPUT, RI); // Read Output Struct if not empty - if(!OutputTy->isEmptyTy()) { - std::vector<Value*>h_Outputs; - Value* KernelOutput = UndefValue::get(OutputTy); - for(unsigned i=0; i < OutputTy->getNumElements(); i++) { - Value* GetOutputArgs[] = {GraphID, - Constant::getNullValue(Type::getInt8PtrTy(M.getContext())), - d_Outputs[i], - ConstantExpr::getSizeOf(OutputTy->getElementType(i)) - }; - CallInst* h_Output = CallInst::Create(llvm_visc_ocl_getOutput, - ArrayRef<Value*>(GetOutputArgs, 4), - "h_output."+KF->getName()+".addr", - RI); + if (!OutputTy->isEmptyTy()) { + std::vector<Value *> h_Outputs; + Value *KernelOutput = UndefValue::get(OutputTy); + for (unsigned i = 0; i < OutputTy->getNumElements(); i++) { + Value *GetOutputArgs[] = { + GraphID, Constant::getNullValue(Type::getInt8PtrTy(M.getContext())), + d_Outputs[i], ConstantExpr::getSizeOf(OutputTy->getElementType(i))}; + CallInst *h_Output = CallInst::Create( + llvm_hpvm_ocl_getOutput, ArrayRef<Value *>(GetOutputArgs, 4), + "h_output." + KF->getName() + ".addr", RI); // Read each device pointer listed in output struct // Load the output struct - CastInst* BI = BitCastInst::CreatePointerCast(h_Output, - OutputTy->getElementType(i)->getPointerTo(), "output.ptr", RI); - - Value* OutputElement = new LoadInst(BI, "output."+KF->getName(), RI); - KernelOutput = InsertValueInst::Create(KernelOutput, OutputElement, ArrayRef<unsigned>(i), - KF->getName()+"output", RI); + CastInst *BI = BitCastInst::CreatePointerCast( + h_Output, OutputTy->getElementType(i)->getPointerTo(), "output.ptr", + RI); + + Value *OutputElement = new LoadInst(BI, "output." + KF->getName(), RI); + KernelOutput = InsertValueInst::Create(KernelOutput, OutputElement, + ArrayRef<unsigned>(i), + KF->getName() + "output", RI); } OutputMap[K->KernelLeafNode] = KernelOutput; } @@ -746,75 +728,76 @@ void CGT_NVPTX::insertRuntimeCalls(DFInternalNode* N, Kernel* K, const Twine& Fi DEBUG(errs() << "\tTo: " << *output.h_ptr << "\n"); DEBUG(errs() << "\t#bytes: " << *output.bytes << "\n"); - Value* GetOutputArgs[] = {GraphID, output.h_ptr, output.d_ptr, output.bytes}; - CallInst* CI = CallInst::Create(llvm_visc_ocl_getOutput, + Value* GetOutputArgs[] = {GraphID, output.h_ptr, output.d_ptr, + output.bytes}; CallInst* CI = CallInst::Create(llvm_hpvm_ocl_getOutput, ArrayRef<Value*>(GetOutputArgs, 4), "", RI); }*/ - switchToTimer(visc_TimerID_MEM_FREE, RI); + switchToTimer(hpvm_TimerID_MEM_FREE, RI); // Clear Context and free device memory - DEBUG(errs() << "Clearing context" << "\n"); + DEBUG(errs() << "Clearing context" + << "\n"); // Free Device Memory - for(auto d_ptr: DevicePointers) { - CallInst::Create(llvm_visc_ocl_free, ArrayRef<Value*>(d_ptr), "", RI); + for (auto d_ptr : DevicePointers) { + CallInst::Create(llvm_hpvm_ocl_free, ArrayRef<Value *>(d_ptr), "", RI); } - switchToTimer(visc_TimerID_CLEAR_CTX, CleanupCall); + switchToTimer(hpvm_TimerID_CLEAR_CTX, CleanupCall); // Clear Context - LoadInst* LI = new LoadInst(GraphIDAddr, "", CleanupCall); - CallInst::Create(llvm_visc_ocl_clearContext, ArrayRef<Value*>(LI), "", CleanupCall); - switchToTimer(visc_TimerID_NONE, CleanupCall); + LoadInst *LI = new LoadInst(GraphIDAddr, "", CleanupCall); + CallInst::Create(llvm_hpvm_ocl_clearContext, ArrayRef<Value *>(LI), "", + CleanupCall); + switchToTimer(hpvm_TimerID_NONE, CleanupCall); - switchToTimer(visc_TimerID_MISC, RI); + switchToTimer(hpvm_TimerID_MISC, RI); DEBUG(errs() << "*** Generating epilogue code for the function****\n"); // Generate code for output bindings // Get Exit node - DFNode* C = N->getChildGraph()->getExit(); + DFNode *C = N->getChildGraph()->getExit(); // Get OutputType of this node - StructType* OutTy = N->getOutputType(); + StructType *OutTy = N->getOutputType(); Value *retVal = UndefValue::get(F_X86->getReturnType()); // Find the kernel's output arg map, to use instead of the bindings std::vector<unsigned> outArgMap = kernel->getOutArgMap(); // Find all the input edges to exit node - for (unsigned i=0; i < OutTy->getNumElements(); i++) { + for (unsigned i = 0; i < OutTy->getNumElements(); i++) { DEBUG(errs() << "Output Edge " << i << "\n"); // Find the incoming edge at the requested input port - DFEdge* E = C->getInDFEdgeAt(i); + DFEdge *E = C->getInDFEdgeAt(i); assert(E && "No Binding for output element!"); // Find the Source DFNode associated with the incoming edge - DFNode* SrcDF = E->getSourceDF(); + DFNode *SrcDF = E->getSourceDF(); - DEBUG(errs() << "Edge source -- " << SrcDF->getFuncPointer()->getName() << "\n"); + DEBUG(errs() << "Edge source -- " << SrcDF->getFuncPointer()->getName() + << "\n"); // If Source DFNode is a dummyNode, edge is from parent. Get the // argument from argument list of this internal node - Value* inputVal; - if(SrcDF->isEntryNode()) { + Value *inputVal; + if (SrcDF->isEntryNode()) { inputVal = getArgumentAt(F_X86, i); - DEBUG(errs() << "Argument "<< i<< " = " << *inputVal << "\n"); - } - else { + DEBUG(errs() << "Argument " << i << " = " << *inputVal << "\n"); + } else { // edge is from a internal node // Check - code should already be generated for this source dfnode // FIXME: Since the 2-level kernel code gen has aspecific structure, we // can assume the SrcDF is same as Kernel Leaf node. // Use outArgMap to get correct mapping SrcDF = K->KernelLeafNode; - assert(OutputMap.count(SrcDF) - && "Source node call not found. Dependency violation!"); + assert(OutputMap.count(SrcDF) && + "Source node call not found. Dependency violation!"); // Find Output Value associated with the Source DFNode using OutputMap - Value* CI = OutputMap[SrcDF]; + Value *CI = OutputMap[SrcDF]; // Extract element at source position from this call instruction std::vector<unsigned> IndexList; // i is the destination of DFEdge E // Use the mapping instead of the bindings -// IndexList.push_back(E->getSourcePosition()); + // IndexList.push_back(E->getSourcePosition()); IndexList.push_back(outArgMap[i]); - DEBUG(errs() << "Going to generate ExtarctVal inst from "<< *CI <<"\n"); - ExtractValueInst* EI = ExtractValueInst::Create(CI, IndexList, - "",RI); + DEBUG(errs() << "Going to generate ExtarctVal inst from " << *CI << "\n"); + ExtractValueInst *EI = ExtractValueInst::Create(CI, IndexList, "", RI); inputVal = EI; } std::vector<unsigned> IdxList; @@ -823,31 +806,33 @@ void CGT_NVPTX::insertRuntimeCalls(DFInternalNode* N, Kernel* K, const Twine& Fi } DEBUG(errs() << "Extracted all\n"); - switchToTimer(visc_TimerID_NONE, RI); + switchToTimer(hpvm_TimerID_NONE, RI); retVal->setName("output"); - ReturnInst* newRI = ReturnInst::Create(F_X86->getContext(), retVal); + ReturnInst *newRI = ReturnInst::Create(F_X86->getContext(), retVal); ReplaceInstWithInst(RI, newRI); } - // Right now, only targeting the one level case. In general, device functions // can return values so we don't need to change them -void CGT_NVPTX::codeGen(DFInternalNode* N) { - errs () << "Inside internal node: " << N->getFuncPointer()->getName() << "\n"; - if(KernelLaunchNode == NULL) - errs () << "No kernel launch node\n"; +void CGT_NVPTX::codeGen(DFInternalNode *N) { + DEBUG(errs() << "Inside internal node: " << N->getFuncPointer()->getName() + << "\n"); + if (KernelLaunchNode == NULL) + DEBUG(errs() << "No kernel launch node\n"); else { - errs() << "KernelLaunchNode: " << KernelLaunchNode->getFuncPointer()->getName() << "\n"; + DEBUG(errs() << "KernelLaunchNode: " + << KernelLaunchNode->getFuncPointer()->getName() << "\n"); } if (!KernelLaunchNode) { - DEBUG(errs() << "No code generated (host code for kernel launch complete).\n"); + DEBUG(errs() + << "No code generated (host code for kernel launch complete).\n"); return; } if (N == KernelLaunchNode) { DEBUG(errs() << "Found kernel launch node. Generating host code.\n"); - //TODO + // TODO // Now the remaining nodes to be visited should be ignored KernelLaunchNode = NULL; @@ -862,7 +847,8 @@ void CGT_NVPTX::codeGen(DFInternalNode* N) { // TODO: Structure assumed: one thread node, one allocation node (at most), // TB node std::map<unsigned, unsigned> inmapFinal; - for (std::map<unsigned, unsigned>::iterator ib = inmap2.begin(), ie = inmap2.end(); + for (std::map<unsigned, unsigned>::iterator ib = inmap2.begin(), + ie = inmap2.end(); ib != ie; ++ib) { inmapFinal[ib->first] = inmap1[ib->second]; } @@ -879,8 +865,9 @@ void CGT_NVPTX::codeGen(DFInternalNode* N) { // 0 ... outmap2.size()-1 // The limit is the size of outmap2, because this is the number of kernel // output arguments for which the mapping matters - // For now, it reasonable to assume that all the kernel arguments are returned, - // maybe plys some others from other nodes, thus outmap2.size() <= outmap1.size() + // For now, it reasonable to assume that all the kernel arguments are + // returned, maybe plys some others from other nodes, thus outmap2.size() <= + // outmap1.size() for (unsigned i = 0; i < outmap2.size(); i++) { outmap1[i] = outmap2[outmap1[i]]; } @@ -888,15 +875,14 @@ void CGT_NVPTX::codeGen(DFInternalNode* N) { // Track the source of local dimlimits for the kernel // Dimension limit can either be a constant or an argument of parent - // function. Since Internal node would no longer exist, we need to insert the - // localWGSize with values from the parent of N. - std::vector<Value*> localWGSizeMapped; + // function. Since Internal node would no longer exist, we need to insert + // the localWGSize with values from the parent of N. + std::vector<Value *> localWGSizeMapped; for (unsigned i = 0; i < kernel->localWGSize.size(); i++) { if (isa<Constant>(kernel->localWGSize[i])) { // if constant, use as it is localWGSizeMapped.push_back(kernel->localWGSize[i]); - } - else if (Argument* Arg = dyn_cast<Argument>(kernel->localWGSize[i])) { + } else if (Argument *Arg = dyn_cast<Argument>(kernel->localWGSize[i])) { // if argument, find the argument location in N. Use InArgMap of N to // find the source location in Parent of N. Retrieve the argument from // parent to insert in the vector. @@ -906,46 +892,49 @@ void CGT_NVPTX::codeGen(DFInternalNode* N) { assert(N->getInArgMap().find(argNum) != N->getInArgMap().end()); unsigned parentArgNum = N->getInArgMap()[argNum]; - Argument* A = getArgumentAt(N->getParent()->getFuncPointer(), parentArgNum); + Argument *A = + getArgumentAt(N->getParent()->getFuncPointer(), parentArgNum); localWGSizeMapped.push_back(A); - } - else { - assert(false && "LocalWGsize using value which is neither argument nor constant!"); + } else { + assert( + false && + "LocalWGsize using value which is neither argument nor constant!"); } } // Update localWGSize vector of kernel kernel->setLocalWGSize(localWGSizeMapped); } - } -void CGT_NVPTX::codeGen(DFLeafNode* N) { - errs () << "Inside leaf node: " << N->getFuncPointer()->getName() << "\n"; +void CGT_NVPTX::codeGen(DFLeafNode *N) { + DEBUG(errs() << "Inside leaf node: " << N->getFuncPointer()->getName() + << "\n"); // Skip code generation if it is a dummy node - if(N->isDummyNode()) { + if (N->isDummyNode()) { DEBUG(errs() << "Skipping dummy node\n"); return; } // Skip code generation if it is an allocation node - if(N->isAllocationNode()) { + if (N->isAllocationNode()) { DEBUG(errs() << "Skipping allocation node\n"); return; } // Generate code only if it has the right hint -// if(!checkPreferredTarget(N, visc::GPU_TARGET)) { -// errs() << "Skipping node: "<< N->getFuncPointer()->getName() << "\n"; -// return; -// } - if(!preferredTargetIncludes(N, visc::GPU_TARGET)) { - errs() << "Skipping node: "<< N->getFuncPointer()->getName() << "\n"; + // if(!checkPreferredTarget(N, hpvm::GPU_TARGET)) { + // errs() << "Skipping node: "<< N->getFuncPointer()->getName() << "\n"; + // return; + // } + if (!preferredTargetIncludes(N, hpvm::GPU_TARGET)) { + DEBUG(errs() << "Skipping node: " << N->getFuncPointer()->getName() + << "\n"); return; } // Checking which node is the kernel launch - DFNode* PNode = N->getParent(); + DFNode *PNode = N->getParent(); int pLevel = PNode->getLevel(); int pReplFactor = PNode->getNumOfDim(); @@ -953,42 +942,40 @@ void CGT_NVPTX::codeGen(DFLeafNode* N) { // (1) Parent is the top level node i.e., Root of DFG // OR // (2) Parent does not have multiple instances - errs() << "pLevel = " << pLevel << "\n"; - errs() << "pReplFactor = " << pReplFactor << "\n"; + DEBUG(errs() << "pLevel = " << pLevel << "\n"); + DEBUG(errs() << "pReplFactor = " << pReplFactor << "\n"); assert((pLevel > 0) && "Root not allowed to be chosen as Kernel Node."); // Only these options are supported - enum XLevelHierarchy{ONE_LEVEL, TWO_LEVEL} SelectedHierarchy; - if(pLevel == 1 || !pReplFactor) { - errs() << "*************** Kernel Gen: 1-Level Hierarchy **************\n"; + enum XLevelHierarchy { ONE_LEVEL, TWO_LEVEL } SelectedHierarchy; + if (pLevel == 1 || !pReplFactor) { + DEBUG(errs() + << "*************** Kernel Gen: 1-Level Hierarchy **************\n"); SelectedHierarchy = ONE_LEVEL; KernelLaunchNode = PNode; - kernel = new Kernel(NULL, - N, - N->getInArgMap(), - N->getSharedInArgMap(), - N->getOutArgMap(), - N->getNumOfDim(), - N->getDimLimits()); - } - else { + kernel = new Kernel(NULL, N, N->getInArgMap(), N->getSharedInArgMap(), + N->getOutArgMap(), N->getNumOfDim(), N->getDimLimits()); + } else { // Converting a 2-level DFG to opencl kernel - errs() << "*************** Kernel Gen: 2-Level Hierarchy **************\n"; - assert((pLevel >= 2) && "Selected node not nested deep enough to be Kernel Node."); + DEBUG(errs() + << "*************** Kernel Gen: 2-Level Hierarchy **************\n"); + assert((pLevel >= 2) && + "Selected node not nested deep enough to be Kernel Node."); SelectedHierarchy = TWO_LEVEL; KernelLaunchNode = PNode->getParent(); - assert((PNode->getNumOfDim() == N->getNumOfDim()) && "Dimension number must match"); + assert((PNode->getNumOfDim() == N->getNumOfDim()) && + "Dimension number must match"); // Contains the instructions generating the kernel configuration parameters - kernel = new Kernel(NULL, // kernel function - N, // kernel leaf node - N->getInArgMap(), // kenel argument mapping + kernel = new Kernel(NULL, // kernel function + N, // kernel leaf node + N->getInArgMap(), // kenel argument mapping N->getSharedInArgMap(), - N->getOutArgMap(), // kernel output mapping from the leaf to the interemediate node - PNode->getNumOfDim(), // gridDim - PNode->getDimLimits(),// grid size - N->getNumOfDim(), // blockDim - N->getDimLimits()); // block size - + N->getOutArgMap(), // kernel output mapping from the + // leaf to the interemediate node + PNode->getNumOfDim(), // gridDim + PNode->getDimLimits(), // grid size + N->getNumOfDim(), // blockDim + N->getDimLimits()); // block size } std::vector<Instruction *> IItoRemove; @@ -1000,58 +987,62 @@ void CGT_NVPTX::codeGen(DFLeafNode* N) { // Look up if we have visited this function before. If we have, then just // get the cloned function pointer from DFNode. Otherwise, create the cloned // function and add it to the DFNode GenFunc. -// Function *F_nvptx = N->getGenFunc(); - Function *F_nvptx = N->getGenFuncForTarget(visc::GPU_TARGET); + // Function *F_nvptx = N->getGenFunc(); + Function *F_nvptx = N->getGenFuncForTarget(hpvm::GPU_TARGET); - assert(F_nvptx == NULL && "Error: Visiting a node for which code already generated"); + assert(F_nvptx == NULL && + "Error: Visiting a node for which code already generated"); // Clone the function ValueToValueMapTy VMap; - //F_nvptx->setName(FName+"_nvptx"); + // F_nvptx->setName(FName+"_nvptx"); Twine FName = F->getName(); StringRef fStr = FName.getSingleStringRef(); - Twine newFName = Twine(fStr, "_nvptx"); + Twine newFName = Twine(fStr, "_nvptx"); F_nvptx = CloneFunction(F, VMap); F_nvptx->setName(newFName); - // errs() << "Old Function Name: " << F->getName() << "\n"; // errs() << "New Function Name: " << F_nvptx->getName() << "\n"; F_nvptx->removeFromParent(); - // Insert the cloned function into the kernels module KernelM->getFunctionList().push_back(F_nvptx); - - //TODO: Iterate over all the instructions of F_nvptx and identify the - //callees and clone them into this module. + // TODO: Iterate over all the instructions of F_nvptx and identify the + // callees and clone them into this module. DEBUG(errs() << *F_nvptx->getType()); DEBUG(errs() << *F_nvptx); // Transform the function to void and remove all target dependent attributes // from the function F_nvptx = transformFunctionToVoid(F_nvptx); - - //Add generated function info to DFNode -// N->setGenFunc(F_nvptx, visc::GPU_TARGET); - N->addGenFunc(F_nvptx, visc::GPU_TARGET, false); - DEBUG(errs() << "Removing all attributes from Kernel Function and adding nounwind\n"); - F_nvptx->removeAttributes(AttributeList::FunctionIndex, F_nvptx->getAttributes().getFnAttributes()); + // Add generated function info to DFNode + // N->setGenFunc(F_nvptx, hpvm::GPU_TARGET); + N->addGenFunc(F_nvptx, hpvm::GPU_TARGET, false); + + DEBUG( + errs() + << "Removing all attributes from Kernel Function and adding nounwind\n"); + F_nvptx->removeAttributes(AttributeList::FunctionIndex, + F_nvptx->getAttributes().getFnAttributes()); F_nvptx->addAttribute(AttributeList::FunctionIndex, Attribute::NoUnwind); - //FIXME: For now, assume only one allocation node + // FIXME: For now, assume only one allocation node kernel->AllocationNode = NULL; - for (DFNode::const_indfedge_iterator ieb = N->indfedge_begin(), iee = N->indfedge_end(); + for (DFNode::const_indfedge_iterator ieb = N->indfedge_begin(), + iee = N->indfedge_end(); ieb != iee; ++ieb) { DFNode *SrcDFNode = (*ieb)->getSourceDF(); - DEBUG(errs() << "Found edge from node: " << " " << SrcDFNode->getFuncPointer()->getName() << "\n"); + DEBUG(errs() << "Found edge from node: " + << " " << SrcDFNode->getFuncPointer()->getName() << "\n"); DEBUG(errs() << "Current Node: " << N->getFuncPointer()->getName() << "\n"); - DEBUG(errs() << "isAllocationNode = "<< SrcDFNode->isAllocationNode() << "\n"); + DEBUG(errs() << "isAllocationNode = " << SrcDFNode->isAllocationNode() + << "\n"); if (!SrcDFNode->isDummyNode()) { assert(SrcDFNode->isAllocationNode()); kernel->AllocationNode = dyn_cast<DFLeafNode>(SrcDFNode); @@ -1066,18 +1057,20 @@ void CGT_NVPTX::codeGen(DFLeafNode* N) { // If no allocation node was found, SharedMemArgs is empty if (kernel->AllocationNode) { ValueToValueMapTy VMap; - Function *F_alloc = CloneFunction(kernel->AllocationNode->getFuncPointer(), VMap); - //F_alloc->removeFromParent(); + Function *F_alloc = + CloneFunction(kernel->AllocationNode->getFuncPointer(), VMap); + // F_alloc->removeFromParent(); // Insert the cloned function into the kernels module - //M.getFunctionList().push_back(F_alloc); + // M.getFunctionList().push_back(F_alloc); - std::vector<IntrinsicInst *> ViscMallocInstVec; - findIntrinsicInst(F_alloc, Intrinsic::visc_malloc, ViscMallocInstVec); + std::vector<IntrinsicInst *> HPVMMallocInstVec; + findIntrinsicInst(F_alloc, Intrinsic::hpvm_malloc, HPVMMallocInstVec); - for (unsigned i = 0; i < ViscMallocInstVec.size(); i++) { - IntrinsicInst *II = ViscMallocInstVec[i]; - assert(II->hasOneUse() && "visc_malloc result is used more than once"); - II->replaceAllUsesWith(ConstantPointerNull::get(Type::getInt8PtrTy(M.getContext()))); + for (unsigned i = 0; i < HPVMMallocInstVec.size(); i++) { + IntrinsicInst *II = HPVMMallocInstVec[i]; + assert(II->hasOneUse() && "hpvm_malloc result is used more than once"); + II->replaceAllUsesWith( + ConstantPointerNull::get(Type::getInt8PtrTy(M.getContext()))); II->eraseFromParent(); } kernel->AllocationFunction = F_alloc; @@ -1092,15 +1085,19 @@ void CGT_NVPTX::codeGen(DFLeafNode* N) { assert(RetStructTy && "Allocation node does not return a struct type"); unsigned numFields = RetStructTy->getNumElements(); */ - std::map<unsigned, std::pair<Value*, unsigned> > sharedInMap = kernel->getSharedInArgMap(); - AllocationNodeProperty* APN = - (AllocationNodeProperty*) kernel->AllocationNode->getProperty(DFNode::Allocation); - for (auto& AllocPair: APN->getAllocationList()) { + std::map<unsigned, std::pair<Value *, unsigned>> sharedInMap = + kernel->getSharedInArgMap(); + AllocationNodeProperty *APN = + (AllocationNodeProperty *)kernel->AllocationNode->getProperty( + DFNode::Allocation); + for (auto &AllocPair : APN->getAllocationList()) { unsigned destPos = AllocPair.first->getDestPosition(); unsigned srcPos = AllocPair.first->getSourcePosition(); SharedMemArgs.push_back(destPos); - sharedInMap[destPos] = std::pair<Value *, unsigned>(AllocPair.second, srcPos+1); - sharedInMap[destPos+1] = std::pair<Value *, unsigned>(AllocPair.second, srcPos+1); + sharedInMap[destPos] = + std::pair<Value *, unsigned>(AllocPair.second, srcPos + 1); + sharedInMap[destPos + 1] = + std::pair<Value *, unsigned>(AllocPair.second, srcPos + 1); } kernel->setSharedInArgMap(sharedInMap); } @@ -1110,12 +1107,14 @@ void CGT_NVPTX::codeGen(DFLeafNode* N) { // global address space unsigned argIndex = 0; std::vector<unsigned> GlobalMemArgs; - for(Function::arg_iterator ai = F_nvptx->arg_begin(), ae = F_nvptx->arg_end(); - ai != ae; ++ai) { - if (ai->getType()->isPointerTy()) { - // If the arguement is already chosen for shared memory arguemnt list, skip. - // Else put it in Global memory arguement list - if(std::count(SharedMemArgs.begin(), SharedMemArgs.end(), argIndex) == 0) { + for (Function::arg_iterator ai = F_nvptx->arg_begin(), + ae = F_nvptx->arg_end(); + ai != ae; ++ai) { + if (ai->getType()->isPointerTy()) { + // If the arguement is already chosen for shared memory arguemnt list, + // skip. Else put it in Global memory arguement list + if (std::count(SharedMemArgs.begin(), SharedMemArgs.end(), argIndex) == + 0) { GlobalMemArgs.push_back(argIndex); } } @@ -1129,20 +1128,21 @@ void CGT_NVPTX::codeGen(DFLeafNode* N) { // Optimization: Gloabl memory arguments, which are not modified and whose // loads are not dependent on node id of current node, should be moved to // constant memory, subject to size of course - std::vector<unsigned> ConstantMemArgs = globalToConstantMemoryOpt(&GlobalMemArgs, F_nvptx); + std::vector<unsigned> ConstantMemArgs = + globalToConstantMemoryOpt(&GlobalMemArgs, F_nvptx); F_nvptx = changeArgAddrspace(F_nvptx, ConstantMemArgs, GLOBAL_ADDRSPACE); F_nvptx = changeArgAddrspace(F_nvptx, SharedMemArgs, SHARED_ADDRSPACE); F_nvptx = changeArgAddrspace(F_nvptx, GlobalMemArgs, GLOBAL_ADDRSPACE); -// Function to replace call instructions to functions in the kernel + // Function to replace call instructions to functions in the kernel std::map<Function *, Function *> OrgToClonedFuncMap; std::vector<Function *> FuncToBeRemoved; - auto CloneAndReplaceCall = [&] (CallInst *CI, Function *OrgFunc) { - Function* NewFunc; + auto CloneAndReplaceCall = [&](CallInst *CI, Function *OrgFunc) { + Function *NewFunc; // Check if the called function has already been cloned before. auto It = OrgToClonedFuncMap.find(OrgFunc); - if(It == OrgToClonedFuncMap.end()) { + if (It == OrgToClonedFuncMap.end()) { ValueToValueMapTy VMap; NewFunc = CloneFunction(OrgFunc, VMap); OrgToClonedFuncMap[OrgFunc] = NewFunc; @@ -1151,43 +1151,48 @@ void CGT_NVPTX::codeGen(DFLeafNode* N) { NewFunc = (*It).second; } // Replace the calls to this function - std::vector<Value*> args; - for(unsigned i=0; i < CI->getNumArgOperands(); i++) { + std::vector<Value *> args; + for (unsigned i = 0; i < CI->getNumArgOperands(); i++) { args.push_back(CI->getArgOperand(i)); } - CallInst* Inst = CallInst::Create(NewFunc, args, - OrgFunc->getReturnType()->isVoidTy()? "" : CI->getName(), CI); + CallInst *Inst = CallInst::Create( + NewFunc, args, + OrgFunc->getReturnType()->isVoidTy() ? "" : CI->getName(), CI); CI->replaceAllUsesWith(Inst); IItoRemove.push_back(CI); return NewFunc; }; - // Go through all the instructions - for (inst_iterator i = inst_begin(F_nvptx), e = inst_end(F_nvptx); i != e; ++i) { + for (inst_iterator i = inst_begin(F_nvptx), e = inst_end(F_nvptx); i != e; + ++i) { Instruction *I = &(*i); - // Leaf nodes should not contain VISC graph intrinsics or launch - assert(!BuildDFG::isViscLaunchIntrinsic(I) && "Launch intrinsic within a dataflow graph!"); - assert(!BuildDFG::isViscGraphIntrinsic(I) && "VISC graph intrinsic within a leaf dataflow node!"); + // Leaf nodes should not contain HPVM graph intrinsics or launch + assert(!BuildDFG::isHPVMLaunchIntrinsic(I) && + "Launch intrinsic within a dataflow graph!"); + assert(!BuildDFG::isHPVMGraphIntrinsic(I) && + "HPVM graph intrinsic within a leaf dataflow node!"); - if (BuildDFG::isViscIntrinsic(I)) { - IntrinsicInst* II = dyn_cast<IntrinsicInst>(I); - IntrinsicInst* ArgII; - DFNode* ArgDFNode; + if (BuildDFG::isHPVMIntrinsic(I)) { + IntrinsicInst *II = dyn_cast<IntrinsicInst>(I); + IntrinsicInst *ArgII; + DFNode *ArgDFNode; - /************************ Handle VISC Query intrinsics ************************/ + /************************ Handle HPVM Query intrinsics + * ************************/ switch (II->getIntrinsicID()) { - /**************************** llvm.visc.getNode() *****************************/ - case Intrinsic::visc_getNode: { + /**************************** llvm.hpvm.getNode() + * *****************************/ + case Intrinsic::hpvm_getNode: { DEBUG(errs() << F_nvptx->getName() << "\t: Handling getNode\n"); // add mapping <intrinsic, this node> to the node-specific map Leaf_HandleToDFNodeMap[II] = N; IItoRemove.push_back(II); - } - break; - /************************* llvm.visc.getParentNode() **************************/ - case Intrinsic::visc_getParentNode: { + } break; + /************************* llvm.hpvm.getParentNode() + * **************************/ + case Intrinsic::hpvm_getParentNode: { DEBUG(errs() << F_nvptx->getName() << "\t: Handling getParentNode\n"); // get the parent node of the arg node // get argument node @@ -1200,10 +1205,10 @@ void CGT_NVPTX::codeGen(DFLeafNode* N) { Leaf_HandleToDFNodeMap[II] = ArgDFNode->getParent(); IItoRemove.push_back(II); - } - break; - /*************************** llvm.visc.getNumDims() ***************************/ - case Intrinsic::visc_getNumDims: { + } break; + /*************************** llvm.hpvm.getNumDims() + * ***************************/ + case Intrinsic::hpvm_getNumDims: { DEBUG(errs() << F_nvptx->getName() << "\t: Handling getNumDims\n"); // get node from map // get the appropriate field @@ -1211,47 +1216,48 @@ void CGT_NVPTX::codeGen(DFLeafNode* N) { ArgDFNode = Leaf_HandleToDFNodeMap[ArgII]; int numOfDim = ArgDFNode->getNumOfDim(); DEBUG(errs() << "\t Got node dimension : " << numOfDim << "\n"); - IntegerType* IntTy = Type::getInt32Ty(KernelM->getContext()); - ConstantInt* numOfDimConstant = ConstantInt::getSigned(IntTy, (int64_t) numOfDim); + IntegerType *IntTy = Type::getInt32Ty(KernelM->getContext()); + ConstantInt *numOfDimConstant = + ConstantInt::getSigned(IntTy, (int64_t)numOfDim); // Replace the result of the intrinsic with the computed value II->replaceAllUsesWith(numOfDimConstant); IItoRemove.push_back(II); - } - break; - /*********************** llvm.visc.getNodeInstanceID() ************************/ - case Intrinsic::visc_getNodeInstanceID_x: - case Intrinsic::visc_getNodeInstanceID_y: - case Intrinsic::visc_getNodeInstanceID_z: { - DEBUG(errs() << F_nvptx->getName() << "\t: Handling getNodeInstanceID\n" << "\t: " << *II << "\n"); + } break; + /*********************** llvm.hpvm.getNodeInstanceID() + * ************************/ + case Intrinsic::hpvm_getNodeInstanceID_x: + case Intrinsic::hpvm_getNodeInstanceID_y: + case Intrinsic::hpvm_getNodeInstanceID_z: { + DEBUG(errs() << F_nvptx->getName() << "\t: Handling getNodeInstanceID\n" + << "\t: " << *II << "\n"); ArgII = cast<IntrinsicInst>((II->getOperand(0))->stripPointerCasts()); ArgDFNode = Leaf_HandleToDFNodeMap[ArgII]; assert(ArgDFNode && "Arg node is NULL"); // A leaf node always has a parent - DFNode* ParentDFNode = ArgDFNode->getParent(); + DFNode *ParentDFNode = ArgDFNode->getParent(); assert(ParentDFNode && "Parent node of a leaf is NULL"); // Get the number associated with the required dimension // FIXME: The order is important! // These three intrinsics need to be consecutive x,y,z - uint64_t dim = II->getIntrinsicID() - - Intrinsic::visc_getNodeInstanceID_x; + uint64_t dim = + II->getIntrinsicID() - Intrinsic::hpvm_getNodeInstanceID_x; assert((dim < 3) && "Invalid dimension argument"); DEBUG(errs() << "\t dimension = " << dim << "\n"); // Argument of the function to be called - ConstantInt * DimConstant = - ConstantInt::get(Type::getInt32Ty(KernelM->getContext()), dim); - //ArrayRef<Value *> Args(DimConstant); + ConstantInt *DimConstant = + ConstantInt::get(Type::getInt32Ty(KernelM->getContext()), dim); + // ArrayRef<Value *> Args(DimConstant); // The following is to find which function to call - Function * OpenCLFunction; + Function *OpenCLFunction; - FunctionType* FT = - FunctionType::get(Type::getInt64Ty(KernelM->getContext()), - Type::getInt32Ty(KernelM->getContext()), - false); + FunctionType *FT = + FunctionType::get(Type::getInt64Ty(KernelM->getContext()), + Type::getInt32Ty(KernelM->getContext()), false); if (SelectedHierarchy == ONE_LEVEL && ArgDFNode == N) { // We only have one level in the hierarchy or the parent node is not // replicated. This indicates that the parent node is the kernel @@ -1260,838 +1266,860 @@ void CGT_NVPTX::codeGen(DFLeafNode* N) { // itself DEBUG(errs() << "Substitute with get_global_id()\n"); DEBUG(errs() << *II << "\n"); - OpenCLFunction = cast<Function> - ((KernelM->getOrInsertFunction(StringRef("get_global_id"), FT)).getCallee()); + OpenCLFunction = cast<Function>( + (KernelM->getOrInsertFunction(StringRef("get_global_id"), FT)) + .getCallee()); } else if (Leaf_HandleToDFNodeMap[ArgII] == N) { - //DEBUG(errs() << "Here inside cond 2\n"); + // DEBUG(errs() << "Here inside cond 2\n"); // We are asking for this node's id with respect to its parent // this is a local id call - OpenCLFunction = cast<Function> - ((KernelM->getOrInsertFunction(StringRef("get_local_id"), FT)).getCallee()); - //DEBUG(errs() << "exiting condition 2\n"); + OpenCLFunction = cast<Function>( + (KernelM->getOrInsertFunction(StringRef("get_local_id"), FT)) + .getCallee()); + // DEBUG(errs() << "exiting condition 2\n"); } else if (Leaf_HandleToDFNodeMap[ArgII] == N->getParent()) { // We are asking for this node's parent's id with respect to its // parent: this is a group id call - OpenCLFunction = cast<Function> - ((KernelM->getOrInsertFunction(StringRef("get_group_id"), FT)).getCallee()); + OpenCLFunction = cast<Function>( + (KernelM->getOrInsertFunction(StringRef("get_group_id"), FT)) + .getCallee()); } else { - errs() << N->getFuncPointer()->getName() << "\n"; - errs() << N->getParent()->getFuncPointer()->getName() << "\n"; - errs() << *II << "\n"; + DEBUG(errs() << N->getFuncPointer()->getName() << "\n"); + DEBUG(errs() << N->getParent()->getFuncPointer()->getName() << "\n"); + DEBUG(errs() << *II << "\n"); assert(false && "Unable to translate getNodeInstanceID intrinsic"); } - //DEBUG(errs() << "Create call instruction, insert it before the instrinsic\n"); - //DEBUG(errs() << "Function: " << *OpenCLFunction << "\n"); - //DEBUG(errs() << "Arguments size: " << Args.size() << "\n"); - //DEBUG(errs() << "Argument: " << Args[0] << "\n"); - //DEBUG(errs() << "Arguments: " << *DimConstant << "\n"); + // DEBUG(errs() << "Create call instruction, insert it before the + // instrinsic\n"); DEBUG(errs() << "Function: " << *OpenCLFunction << + // "\n"); DEBUG(errs() << "Arguments size: " << Args.size() << "\n"); + // DEBUG(errs() << "Argument: " << Args[0] << "\n"); + // DEBUG(errs() << "Arguments: " << *DimConstant << "\n"); // Create call instruction, insert it before the intrinsic and // replace the uses of the previous instruction with the new one - CallInst* CI = CallInst::Create(OpenCLFunction, DimConstant, "", II); - //DEBUG(errs() << "Replace uses\n"); + CallInst *CI = CallInst::Create(OpenCLFunction, DimConstant, "", II); + // DEBUG(errs() << "Replace uses\n"); II->replaceAllUsesWith(CI); IItoRemove.push_back(II); - } - break; - /********************** llvm.visc.getNumNodeInstances() ***********************/ - case Intrinsic::visc_getNumNodeInstances_x: - case Intrinsic::visc_getNumNodeInstances_y: - case Intrinsic::visc_getNumNodeInstances_z: { + } break; + /********************** llvm.hpvm.getNumNodeInstances() + * ***********************/ + case Intrinsic::hpvm_getNumNodeInstances_x: + case Intrinsic::hpvm_getNumNodeInstances_y: + case Intrinsic::hpvm_getNumNodeInstances_z: { // TODO: think about whether this is the best way to go there are hw // specific registers. therefore it is good to have the intrinsic but // then, why do we need to keep that info in the graph? (only for the // kernel configuration during the call) - DEBUG(errs() << F_nvptx->getName() << "\t: Handling getNumNodeInstances\n"); + DEBUG(errs() << F_nvptx->getName() + << "\t: Handling getNumNodeInstances\n"); ArgII = cast<IntrinsicInst>((II->getOperand(0))->stripPointerCasts()); ArgDFNode = Leaf_HandleToDFNodeMap[ArgII]; // A leaf node always has a parent - DFNode* ParentDFNode = ArgDFNode->getParent(); + DFNode *ParentDFNode = ArgDFNode->getParent(); assert(ParentDFNode && "Parent node of a leaf is NULL"); // Get the number associated with the required dimension // FIXME: The order is important! // These three intrinsics need to be consecutive x,y,z - uint64_t dim = II->getIntrinsicID() - - Intrinsic::visc_getNumNodeInstances_x; + uint64_t dim = + II->getIntrinsicID() - Intrinsic::hpvm_getNumNodeInstances_x; assert((dim < 3) && "Invalid dimension argument"); DEBUG(errs() << "\t dimension = " << dim << "\n"); // Argument of the function to be called - ConstantInt * DimConstant = - ConstantInt::get(Type::getInt32Ty(KernelM->getContext()), dim); - //ArrayRef<Value *> Args(DimConstant); + ConstantInt *DimConstant = + ConstantInt::get(Type::getInt32Ty(KernelM->getContext()), dim); + // ArrayRef<Value *> Args(DimConstant); // The following is to find which function to call - Function * OpenCLFunction; - FunctionType* FT = + Function *OpenCLFunction; + FunctionType *FT = FunctionType::get(Type::getInt64Ty(KernelM->getContext()), - Type::getInt32Ty(KernelM->getContext()), - false); + Type::getInt32Ty(KernelM->getContext()), false); if (N == ArgDFNode && SelectedHierarchy == ONE_LEVEL) { // We only have one level in the hierarchy or the parent node is not // replicated. This indicates that the parent node is the kernel // launch, so the instances are global_size (gridDim x blockDim) - OpenCLFunction = cast<Function> - ((KernelM->getOrInsertFunction(StringRef("get_global_size"), FT)).getCallee()); + OpenCLFunction = cast<Function>( + (KernelM->getOrInsertFunction(StringRef("get_global_size"), FT)) + .getCallee()); } else if (Leaf_HandleToDFNodeMap[ArgII] == N) { // We are asking for this node's instances // this is a local size (block dim) call - OpenCLFunction = cast<Function> - ((KernelM->getOrInsertFunction(StringRef("get_local_size"), FT)).getCallee()); + OpenCLFunction = cast<Function>( + (KernelM->getOrInsertFunction(StringRef("get_local_size"), FT)) + .getCallee()); } else if (Leaf_HandleToDFNodeMap[ArgII] == N->getParent()) { // We are asking for this node's parent's instances // this is a (global_size/local_size) (grid dim) call - OpenCLFunction = cast<Function> - ((KernelM->getOrInsertFunction(StringRef("get_num_groups"), FT)).getCallee()); + OpenCLFunction = cast<Function>( + (KernelM->getOrInsertFunction(StringRef("get_num_groups"), FT)) + .getCallee()); } else { assert(false && "Unable to translate getNumNodeInstances intrinsic"); } // Create call instruction, insert it before the intrinsic and // replace the uses of the previous instruction with the new one - CallInst* CI = CallInst::Create(OpenCLFunction, DimConstant, "", II); + CallInst *CI = CallInst::Create(OpenCLFunction, DimConstant, "", II); II->replaceAllUsesWith(CI); IItoRemove.push_back(II); - } - break; - case Intrinsic::visc_barrier: - { + } break; + case Intrinsic::hpvm_barrier: { DEBUG(errs() << F_nvptx->getName() << "\t: Handling barrier\n"); DEBUG(errs() << "Substitute with barrier()\n"); DEBUG(errs() << *II << "\n"); - FunctionType* FT = FunctionType::get(Type::getVoidTy(KernelM->getContext()), - std::vector<Type*>(1, Type::getInt32Ty(KernelM->getContext())), - false); - Function* OpenCLFunction = cast<Function> - ((KernelM->getOrInsertFunction(StringRef("barrier"), FT)).getCallee()); - CallInst* CI = CallInst::Create(OpenCLFunction, - ArrayRef<Value*>(ConstantInt::get(Type::getInt32Ty(KernelM->getContext()), 1)), - "", II); + FunctionType *FT = FunctionType::get( + Type::getVoidTy(KernelM->getContext()), + std::vector<Type *>(1, Type::getInt32Ty(KernelM->getContext())), + false); + Function *OpenCLFunction = cast<Function>( + (KernelM->getOrInsertFunction(StringRef("barrier"), FT)) + .getCallee()); + CallInst *CI = + CallInst::Create(OpenCLFunction, + ArrayRef<Value *>(ConstantInt::get( + Type::getInt32Ty(KernelM->getContext()), 1)), + "", II); II->replaceAllUsesWith(CI); IItoRemove.push_back(II); - } - break; - case Intrinsic::visc_atomic_add: - case Intrinsic::visc_atomic_sub: - case Intrinsic::visc_atomic_xchg: - case Intrinsic::visc_atomic_min: - case Intrinsic::visc_atomic_max: - case Intrinsic::visc_atomic_and: - case Intrinsic::visc_atomic_or: - case Intrinsic::visc_atomic_xor: - { + } break; + case Intrinsic::hpvm_atomic_add: + case Intrinsic::hpvm_atomic_sub: + case Intrinsic::hpvm_atomic_xchg: + case Intrinsic::hpvm_atomic_min: + case Intrinsic::hpvm_atomic_max: + case Intrinsic::hpvm_atomic_and: + case Intrinsic::hpvm_atomic_or: + case Intrinsic::hpvm_atomic_xor: { DEBUG(errs() << *II << "\n"); // Only have support for i32 atomic intrinsics - assert(II->getType() == Type::getInt32Ty(II->getContext()) - && "Only support i32 atomic intrinsics for now"); + assert(II->getType() == Type::getInt32Ty(II->getContext()) && + "Only support i32 atomic intrinsics for now"); // Substitute with atomicrmw instruction - assert(II->getNumArgOperands() == 2 && "Expecting 2 operands for these atomics"); - Value* Ptr = II->getArgOperand(0); - Value* Val = II->getArgOperand(1); - assert(Ptr->getType()->isPointerTy() - && "First argument of supported atomics is expected to be a pointer"); - PointerType* PtrTy = cast<PointerType>(Ptr->getType()); - PointerType* TargetTy = Type::getInt32PtrTy(II->getContext(), PtrTy->getAddressSpace()); + assert(II->getNumArgOperands() == 2 && + "Expecting 2 operands for these atomics"); + Value *Ptr = II->getArgOperand(0); + Value *Val = II->getArgOperand(1); + assert( + Ptr->getType()->isPointerTy() && + "First argument of supported atomics is expected to be a pointer"); + PointerType *PtrTy = cast<PointerType>(Ptr->getType()); + PointerType *TargetTy = + Type::getInt32PtrTy(II->getContext(), PtrTy->getAddressSpace()); if (PtrTy != TargetTy) { Ptr = CastInst::CreatePointerCast(Ptr, TargetTy, "", II); PtrTy = TargetTy; } - std::string name; - if(II->getIntrinsicID() == Intrinsic::visc_atomic_add) - name = "atomic_add"; - else if(II->getIntrinsicID() == Intrinsic::visc_atomic_sub) - name = "atomic_sub"; - else if(II->getIntrinsicID() == Intrinsic::visc_atomic_xchg) - name = "atomic_xchg"; - else if(II->getIntrinsicID() == Intrinsic::visc_atomic_min) - name = "atomic_min"; - else if(II->getIntrinsicID() == Intrinsic::visc_atomic_max) - name = "atomic_max"; - else if(II->getIntrinsicID() == Intrinsic::visc_atomic_and) - name = "atomic_and"; - else if(II->getIntrinsicID() == Intrinsic::visc_atomic_or) - name = "atomic_or"; - else if(II->getIntrinsicID() == Intrinsic::visc_atomic_xor) - name = "atomic_xor"; - Type* paramTypes[] = {PtrTy, Val->getType()}; - FunctionType * AtomFuncT = FunctionType::get(II->getType(), ArrayRef<Type*>(paramTypes,2), false); - FunctionCallee AtomFunc = KernelM->getOrInsertFunction(name, AtomFuncT); - - Value* Params[] = {Ptr, Val}; - CallInst* AtomCI = CallInst::Create(AtomFunc, ArrayRef<Value*>(Params,2), II->getName(), II); - DEBUG(errs() << "Substitute with: " << *AtomCI << "\n"); - II->replaceAllUsesWith(AtomCI); - IItoRemove.push_back(II); - } - break; - default: - llvm_unreachable("Unknown VISC Intrinsic!"); - break; - } - - } - else if(MemCpyInst *MemCpyI = dyn_cast<MemCpyInst>(I)) { - IRBuilder<> Builder(I); - Value *Source = MemCpyI->getSource(); - Value *Destination = MemCpyI->getArgOperand(0)->stripPointerCasts(); - Value *Length = MemCpyI->getOperand(2); - DEBUG(errs() << "Found memcpy instruction: " << *I << "\n"); - DEBUG(errs() << "Source: " << *Source << "\n"); - DEBUG(errs() << "Destination: " << *Destination << "\n"); - DEBUG(errs() << "Length: " << *Length << "\n"); - - size_t memcpy_length; - unsigned int memcpy_count; - if (ConstantInt* CI = dyn_cast<ConstantInt>(Length)) { - if (CI->getBitWidth() <= 64) { - memcpy_length = CI->getSExtValue(); - DEBUG(errs() << "Memcpy lenght = " << memcpy_length << "\n"); - Type *Source_Type = Source->getType()->getPointerElementType(); - DEBUG(errs() << "Source Type : " << *Source_Type << "\n"); - memcpy_count = memcpy_length / (Source_Type->getPrimitiveSizeInBits() / 8); - DEBUG(errs() << "Memcpy count = " << memcpy_count << "\n"); - if (GetElementPtrInst *sourceGEPI = dyn_cast<GetElementPtrInst>(Source)) { - if (GetElementPtrInst *destGEPI = dyn_cast<GetElementPtrInst>(Destination)) { - Value *SourcePtrOperand = sourceGEPI->getPointerOperand(); - Value *DestPtrOperand = destGEPI->getPointerOperand(); - for(int i = 0; i < memcpy_count; ++i) { - Constant *increment; - LoadInst *newLoadI; - StoreInst *newStoreI; - // First, need to increment the correct index for both source and dest - // This invluves checking to see how many indeces the GEP has - // Assume for now only 1 or 2 are the viable options. - - std::vector<Value*> GEPlIndex; - if (sourceGEPI->getNumIndices() == 1) { - Value *Index = sourceGEPI->getOperand(1); - increment = ConstantInt::get(Index->getType(), i, false); - Value *incAdd = Builder.CreateAdd(Index, increment); - DEBUG(errs() << "Add: " << *incAdd << "\n"); - GEPlIndex.push_back(incAdd); - Value *newGEPIl = Builder.CreateGEP(SourcePtrOperand, ArrayRef<Value*>(GEPlIndex)); - DEBUG(errs() << "Load GEP: " << *newGEPIl << "\n"); - newLoadI = Builder.CreateLoad(newGEPIl); - DEBUG(errs() << "Load: " << *newLoadI << "\n"); - } else { - llvm_unreachable("Unhandled case where source GEPI has more than 1 indices!\n"); - } - - - std::vector<Value*> GEPsIndex; - if (destGEPI->getNumIndices() == 1) { - - } else if (destGEPI->getNumIndices() == 2) { - Value *Index0 = destGEPI->getOperand(1); - GEPsIndex.push_back(Index0); - Value *Index1 = destGEPI->getOperand(2); - increment = ConstantInt::get(Index1->getType(), i, false); - Value *incAdd = Builder.CreateAdd(Index1, increment); - DEBUG(errs() << "Add: " << *incAdd << "\n"); - GEPsIndex.push_back(incAdd); - Value *newGEPIs = Builder.CreateGEP(DestPtrOperand, ArrayRef<Value*>(GEPsIndex)); - DEBUG(errs() << "Store GEP: " << *newGEPIs << "\n"); - newStoreI = Builder.CreateStore(newLoadI, newGEPIs, MemCpyI->isVolatile()); - DEBUG(errs() << "Store: " << *newStoreI << "\n"); - } else { - llvm_unreachable("Unhandled case where dest GEPI has more than 2 indices!\n"); - } - } - IItoRemove.push_back(sourceGEPI); - IItoRemove.push_back(destGEPI); - Instruction *destBitcastI = dyn_cast<Instruction>(MemCpyI->getArgOperand(0)); - Instruction *sourceBitcastI = dyn_cast<Instruction>(MemCpyI->getArgOperand(1)); - IItoRemove.push_back(destBitcastI); - IItoRemove.push_back(sourceBitcastI); - IItoRemove.push_back(MemCpyI); - } - } - - } - } else { - llvm_unreachable("MEMCPY length is not a constant, not handled!\n"); - } - // llvm_unreachable("HERE!"); - } - - else if(CallInst* CI = dyn_cast<CallInst>(I)) { - DEBUG(errs() << "Found a call: " << *CI << "\n"); - Function* calleeF = cast<Function>(CI->getCalledValue()->stripPointerCasts()); - if(calleeF->isDeclaration()) { - // Add the declaration to kernel module - if (calleeF->getName() == "sqrtf") { - calleeF->setName(Twine("sqrt")); - DEBUG(errs() << "CaleeF: " << *calleeF << "\n"); - DEBUG(errs() << "CI: " << *CI << "\n"); - } else if (calleeF->getName() == "rsqrtf") { - calleeF->setName(Twine("rsqrt")); - DEBUG(errs() << "CaleeF: " << *calleeF << "\n"); - DEBUG(errs() << "CI: " << *CI << "\n"); - } - DEBUG(errs() << "Adding declaration to Kernel module: " << *calleeF << "\n"); - KernelM->getOrInsertFunction(calleeF->getName(), calleeF->getFunctionType()); - } - else { - // Check if the called function has already been cloned before. - Function *NewFunc = CloneAndReplaceCall(CI, calleeF); - // Iterate over the new function to see if it calls any other functions - // in the module. - for(inst_iterator i = inst_begin(NewFunc), e = inst_end(NewFunc); i != e; ++i) { - if(auto *Call = dyn_cast<CallInst>(&*i)) { - Function *CalledFunc = cast<Function>(Call->getCalledValue()->stripPointerCasts()); - CloneAndReplaceCall(Call, CalledFunc); - } - } - } - //TODO: how to handle address space qualifiers in load/store - } - - } - // search for pattern where float is being casted to int and loaded/stored and change it. - DEBUG(errs() << "finding pattern for replacement!\n"); - for (inst_iterator i = inst_begin(F_nvptx), e = inst_end(F_nvptx); i != e; ++i) { - bool cont = false; - bool keepGEPI = false; - bool keepGEPI2= false; - Instruction *I = &(*i); - GetElementPtrInst* GEPI = dyn_cast<GetElementPtrInst>(I); - - if (!GEPI) { - // did nod find pattern start, continue - continue; - } - // may have found pattern, check - DEBUG(errs() << "GEPI " << *GEPI << "\n"); - // print whatever we want for debug - Value* PtrOp = GEPI->getPointerOperand(); - Type *SrcTy = GEPI->getSourceElementType(); - unsigned GEPIaddrspace = GEPI->getAddressSpace(); - - if (SrcTy->isArrayTy()) - DEBUG(errs() << *SrcTy << " is an array type! " << *(SrcTy->getArrayElementType()) << "\n"); - else - DEBUG(errs() << *SrcTy << " is not an array type!\n"); - // check that source element type is float - if (SrcTy->isArrayTy()) { - if (!(SrcTy->getArrayElementType()->isFloatTy())) { - DEBUG(errs() << "GEPI type is array but not float!\n"); - continue; - } - } - else if (!(SrcTy->isFPOrFPVectorTy()/*isFloatTy()*/)) { - DEBUG(errs() << "GEPI type is " << *SrcTy << "\n"); - // does not fit this pattern - no float GEP instruction - continue; - } - // check that addressspace is 1 - // if (GEPIaddrspace != 1) { - // // does not fit this pattern - addrspace of pointer argument is not global - // continue; - // } - if (!(GEPI->hasOneUse())) { - // does not fit this pattern - more than one uses - //continue; - // Keep GEPI around if it has other uses - keepGEPI = true; - } - DEBUG(errs() << "Found GEPI " << *GEPI << "\n"); - - // 1st GEPI it has one use - // assert(GEPI->hasOneUse() && "GEPI has a single use"); - - // See if it is a bitcast - BitCastInst *BitCastI; - for (User * U : GEPI->users()) { - if(Instruction *ui = dyn_cast<Instruction> (U)) { - DEBUG(errs() << "--" << *ui << "\n"); - if (isa<BitCastInst>(ui)) { - BitCastI = dyn_cast<BitCastInst>(ui); - DEBUG(errs() << "---Found bitcast as only use of GEP\n"); - break; - } - } - DEBUG(errs() << "GEPI does not have a bitcast user, continue\n"); - cont = true; - } - // for (Value::user_iterator ui = GEPI->user_begin(), - // ue = GEPI->user_end(); ui!=ue; ++ui) { - // DEBUG(errs() << "--" << *ui << "\n"); - // if (isa<BitCastInst>(*ui)) { - // BitCastI = dyn_cast<BitCastInst>(*ui); - // DEBUG(errs() << "Found bitcast as only use of GEP\n"); - // } - // } - - if (cont/*!BitCastI*/) { - continue; // not in pattern - } - - // DEBUG(errs() << *BitCastI << "\n"); - // Otherwise, check that first operand is GEP and 2nd is i32*. 1st Operand has to be the GEP, since this is a use of the GEP. - Value *Op2 = BitCastI->getOperand(0); - DEBUG(errs() << "----" << *Op2 << "\n"); - // assert(cast<Type>(Op2) && "Invalid Operand for Bitcast\n"); - // Type *OpTy = cast<Type>(Op2); - Type *OpTy = BitCastI->getDestTy(); - DEBUG(errs() << "---- Bitcast destination type: " << *OpTy << "\n"); - // DEBUG(errs() << "---- " << *(Type::getInt32PtrTy(M.getContext(),1)) << "\n"); - if (!(OpTy == Type::getInt32PtrTy(M.getContext(), GEPIaddrspace))) { - // maybe right syntax is (Type::getInt32Ty)->getPointerTo() - continue; // not in pattern - } - - DEBUG(errs() << "----Here!\n"); - // We are in GEP, bitcast. - - // user_iterator, to find the load. - - if (!(BitCastI->hasOneUse())) { - // does not fit this pattern - more than one uses - continue; - } - DEBUG(errs() << "----Bitcast has one use!\n"); - // it has one use - assert(BitCastI->hasOneUse() && "BitCastI has a single use"); - LoadInst *LoadI; - for (User * U : BitCastI->users()) { - if (Instruction *ui = dyn_cast<Instruction> (U)) { - DEBUG(errs() << "-----" << *ui << "\n"); - if (isa<LoadInst>(ui)) { - LoadI = dyn_cast<LoadInst>(ui); - DEBUG(errs() << "-----Found load as only use of bitcast\n"); - break; - } - } - DEBUG(errs() << "Bitcast does not have a load user, continue!\n"); - cont = true; - } - // for (Value::user_iterator ui = BitCastI->user_begin(), - // ue = BitCastI->user_end(); ui!=ue; ++ui) { - // if (isa<LoadInst>(*ui)) { - // LoadI = dyn_cast<LoadInst>(*ui); - // errs() << "Found load as only use of bitcast\n"; - // } - // } - - if (cont) { - continue; // not in pattern - } - - DEBUG("HERE!\n"); - // check that we load from pointer we got from bitcast - assert - the unique argument must be the use we found it from - assert(LoadI->getPointerOperand() == BitCastI && "Unexpected Load Instruction Operand\n"); - - // Copy user_iterator, to find the store. - - if (!(LoadI->hasOneUse())) { - // does not fit this pattern - more than one uses - continue; - // TODO: generalize: one load can have more than one store users - } - - // it has one use - assert(LoadI->hasOneUse() && "LoadI has a single use"); - Value::user_iterator ui = LoadI->user_begin(); - // skipped loop, because is has a single use - StoreInst *StoreI = dyn_cast<StoreInst>(*ui); - if (!StoreI) { - continue; // not in pattern - } - - // Also check that the store uses the loaded value as the value operand - if (StoreI->getValueOperand() != LoadI) { - continue; - } - - DEBUG(errs() << "-------Found store instruction\n"); - - // Look for its bitcast, which is its pointer operand - Value *StPtrOp = StoreI->getPointerOperand(); - DEBUG(errs() << "-------" << *StPtrOp << "\n"); - BitCastInst *BitCastI2 = dyn_cast<BitCastInst>(StPtrOp); - DEBUG(errs() << "-------" << *BitCastI2 << "\n"); - if (!BitCastI2) { - continue; //not in pattern - } - - DEBUG(errs() << "-------- Found Bit Cast of store!\n" ); - // found bitcast. Look for the second GEP, its from operand. - Value *BCFromOp = BitCastI2->getOperand(0); - GetElementPtrInst *GEPI2 = dyn_cast<GetElementPtrInst>(BCFromOp); - DEBUG(errs() << "---------- " << *GEPI2 << "\n"); - if (!GEPI2) { - continue; //not in pattern - } - - if (!(GEPI2->hasOneUse())) { - // does not fit this pattern - more than one uses - //continue; - // Keep GEPI around if it has other uses - keepGEPI2 = true; - } - DEBUG(errs() << "---------- Found GEPI of Bitcast!\n"); - - Value *PtrOp2 = GEPI2->getPointerOperand(); - - // Found GEPI2. TODO: kind of confused as o what checks I need to add here, let's add them together- all the code for int-float type checks is already above. - - // Assume we found pattern - if (!keepGEPI) { - IItoRemove.push_back(GEPI); - DEBUG(errs() << "Pushing " << *GEPI << " for removal\n"); - } else { - DEBUG(errs() << "Keeping " << *GEPI << " since it has multiple uses!\n"); - } - IItoRemove.push_back(BitCastI); - DEBUG(errs() << "Pushing " << *BitCastI << " for removal\n"); - IItoRemove.push_back(LoadI); - DEBUG(errs() << "Pushing " << *LoadI << " for removal\n"); - IItoRemove.push_back(GEPI2); - DEBUG(errs() << "Pushing " << *GEPI2 << " for removal\n"); - IItoRemove.push_back(BitCastI2); - DEBUG(errs() << "Pushing " << *BitCastI2 << " for removal\n"); - if (!keepGEPI2) { - IItoRemove.push_back(StoreI); - DEBUG(errs() << "Pushing " << *StoreI << " for removal\n"); - } else { - - DEBUG(errs() << "Keeping " << *StoreI << " since it has multiple uses!\n"); - } - - std::vector<Value*> GEPlIndex; - if (GEPI->hasIndices()) { - for(auto ii = GEPI->idx_begin(); ii != GEPI->idx_end(); ++ii) { - Value *Index = dyn_cast<Value>(&*ii); - DEBUG(errs() << "GEP-1 Index: " << *Index << "\n"); - GEPlIndex.push_back(Index); - } - } - // ArrayRef<Value*> GEPlArrayRef(GEPlIndex); - - std::vector<Value*> GEPsIndex; - if (GEPI2->hasIndices()) { - for(auto ii = GEPI2->idx_begin(); ii != GEPI2->idx_end(); ++ii) { - Value *Index = dyn_cast<Value>(&*ii); - DEBUG(errs() << "GEP-2 Index: " << *Index << "\n"); - GEPsIndex.push_back(Index); - } - } - // ArrayRef<Value*> GEPsArrayRef(GEPlIndex); - - - - // ArrayRef<Value*>(GEPI->idx_begin(), GEPI->idx_end()); - GetElementPtrInst* newlGEP = - GetElementPtrInst::Create(GEPI->getSourceElementType(), //Type::getFloatTy(M.getContext()), - PtrOp, // operand from 1st GEP - ArrayRef<Value*>(GEPlIndex), - Twine(), - StoreI); - DEBUG(errs() << "Adding: " << *newlGEP << "\n"); - // insert load before GEPI - LoadInst *newLoadI = - new LoadInst(Type::getFloatTy(M.getContext()), - newlGEP, // new GEP - Twine(), - LoadI->isVolatile(), - LoadI->getAlignment(), - LoadI->getOrdering(), - LoadI->getSyncScopeID(), - StoreI); - DEBUG(errs() << "Adding: " << *newLoadI << "\n"); - // same for GEP for store, for store operand - GetElementPtrInst* newsGEP = - GetElementPtrInst::Create(GEPI2->getSourceElementType(), // Type::getFloatTy(M.getContext()), - PtrOp2, // operand from 2nd GEP - ArrayRef<Value*>(GEPsIndex), - Twine(), - StoreI); - DEBUG(errs() << "Adding: " << *newsGEP << "\n"); - // insert store before GEPI - StoreInst *newStoreI = - new StoreInst(newLoadI, - newsGEP, // new GEP - StoreI->isVolatile(), - StoreI->getAlignment(), - StoreI->getOrdering(), - StoreI->getSyncScopeID(), - StoreI); - DEBUG(errs() << "Adding: " << *newStoreI << "\n"); - - } - - // We need to do this explicitly: DCE pass will not remove them because we - // have assumed theworst memory behaviour for these function calls - // Traverse the vector backwards, otherwise definitions are deleted while - // their subsequent uses are still around - for (auto *I : reverse(IItoRemove)) { - DEBUG(errs() << "Erasing: " << *I << "\n"); - I->eraseFromParent(); - } - - // Removed the cloned functions from the parent module into the new module - for(auto *F : FuncToBeRemoved) { - F->removeFromParent(); //TODO: MARIA check - KernelM->getFunctionList().push_back(F); - } - - addCLMetadata(F_nvptx); - kernel->KernelFunction = F_nvptx; - errs() << "Identified kernel - " << kernel->KernelFunction->getName() << "\n"; - DEBUG(errs() << *KernelM); - - return; -} + std::string name; + if (II->getIntrinsicID() == Intrinsic::hpvm_atomic_add) + name = "atomic_add"; + else if (II->getIntrinsicID() == Intrinsic::hpvm_atomic_sub) + name = "atomic_sub"; + else if (II->getIntrinsicID() == Intrinsic::hpvm_atomic_xchg) + name = "atomic_xchg"; + else if (II->getIntrinsicID() == Intrinsic::hpvm_atomic_min) + name = "atomic_min"; + else if (II->getIntrinsicID() == Intrinsic::hpvm_atomic_max) + name = "atomic_max"; + else if (II->getIntrinsicID() == Intrinsic::hpvm_atomic_and) + name = "atomic_and"; + else if (II->getIntrinsicID() == Intrinsic::hpvm_atomic_or) + name = "atomic_or"; + else if (II->getIntrinsicID() == Intrinsic::hpvm_atomic_xor) + name = "atomic_xor"; + Type *paramTypes[] = {PtrTy, Val->getType()}; + FunctionType *AtomFuncT = FunctionType::get( + II->getType(), ArrayRef<Type *>(paramTypes, 2), false); + FunctionCallee AtomFunc = KernelM->getOrInsertFunction(name, AtomFuncT); + + Value *Params[] = {Ptr, Val}; + CallInst *AtomCI = CallInst::Create( + AtomFunc, ArrayRef<Value *>(Params, 2), II->getName(), II); + DEBUG(errs() << "Substitute with: " << *AtomCI << "\n"); + II->replaceAllUsesWith(AtomCI); + IItoRemove.push_back(II); + } break; + default: + llvm_unreachable("Unknown HPVM Intrinsic!"); + break; + } -bool DFG2LLVM_NVPTX::runOnModule(Module &M) { - errs() << "\nDFG2LLVM_NVPTX PASS\n"; + } else if (MemCpyInst *MemCpyI = dyn_cast<MemCpyInst>(I)) { + IRBuilder<> Builder(I); + Value *Source = MemCpyI->getSource(); + Value *Destination = MemCpyI->getArgOperand(0)->stripPointerCasts(); + Value *Length = MemCpyI->getOperand(2); + DEBUG(errs() << "Found memcpy instruction: " << *I << "\n"); + DEBUG(errs() << "Source: " << *Source << "\n"); + DEBUG(errs() << "Destination: " << *Destination << "\n"); + DEBUG(errs() << "Length: " << *Length << "\n"); + + size_t memcpy_length; + unsigned int memcpy_count; + if (ConstantInt *CI = dyn_cast<ConstantInt>(Length)) { + if (CI->getBitWidth() <= 64) { + memcpy_length = CI->getSExtValue(); + DEBUG(errs() << "Memcpy lenght = " << memcpy_length << "\n"); + Type *Source_Type = Source->getType()->getPointerElementType(); + DEBUG(errs() << "Source Type : " << *Source_Type << "\n"); + memcpy_count = + memcpy_length / (Source_Type->getPrimitiveSizeInBits() / 8); + DEBUG(errs() << "Memcpy count = " << memcpy_count << "\n"); + if (GetElementPtrInst *sourceGEPI = + dyn_cast<GetElementPtrInst>(Source)) { + if (GetElementPtrInst *destGEPI = + dyn_cast<GetElementPtrInst>(Destination)) { + Value *SourcePtrOperand = sourceGEPI->getPointerOperand(); + Value *DestPtrOperand = destGEPI->getPointerOperand(); + for (int i = 0; i < memcpy_count; ++i) { + Constant *increment; + LoadInst *newLoadI; + StoreInst *newStoreI; + // First, need to increment the correct index for both source + // and dest This invluves checking to see how many indeces the + // GEP has Assume for now only 1 or 2 are the viable options. + + std::vector<Value *> GEPlIndex; + if (sourceGEPI->getNumIndices() == 1) { + Value *Index = sourceGEPI->getOperand(1); + increment = ConstantInt::get(Index->getType(), i, false); + Value *incAdd = Builder.CreateAdd(Index, increment); + DEBUG(errs() << "Add: " << *incAdd << "\n"); + GEPlIndex.push_back(incAdd); + Value *newGEPIl = Builder.CreateGEP( + SourcePtrOperand, ArrayRef<Value *>(GEPlIndex)); + DEBUG(errs() << "Load GEP: " << *newGEPIl << "\n"); + newLoadI = Builder.CreateLoad(newGEPIl); + DEBUG(errs() << "Load: " << *newLoadI << "\n"); + } else { + llvm_unreachable("Unhandled case where source GEPI has more " + "than 1 indices!\n"); + } + + std::vector<Value *> GEPsIndex; + if (destGEPI->getNumIndices() == 1) { + + } else if (destGEPI->getNumIndices() == 2) { + Value *Index0 = destGEPI->getOperand(1); + GEPsIndex.push_back(Index0); + Value *Index1 = destGEPI->getOperand(2); + increment = ConstantInt::get(Index1->getType(), i, false); + Value *incAdd = Builder.CreateAdd(Index1, increment); + DEBUG(errs() << "Add: " << *incAdd << "\n"); + GEPsIndex.push_back(incAdd); + Value *newGEPIs = Builder.CreateGEP( + DestPtrOperand, ArrayRef<Value *>(GEPsIndex)); + DEBUG(errs() << "Store GEP: " << *newGEPIs << "\n"); + newStoreI = Builder.CreateStore(newLoadI, newGEPIs, + MemCpyI->isVolatile()); + DEBUG(errs() << "Store: " << *newStoreI << "\n"); + } else { + llvm_unreachable("Unhandled case where dest GEPI has more " + "than 2 indices!\n"); + } + } + IItoRemove.push_back(sourceGEPI); + IItoRemove.push_back(destGEPI); + Instruction *destBitcastI = + dyn_cast<Instruction>(MemCpyI->getArgOperand(0)); + Instruction *sourceBitcastI = + dyn_cast<Instruction>(MemCpyI->getArgOperand(1)); + IItoRemove.push_back(destBitcastI); + IItoRemove.push_back(sourceBitcastI); + IItoRemove.push_back(MemCpyI); + } + } + } + } else { + llvm_unreachable("MEMCPY length is not a constant, not handled!\n"); + } + // llvm_unreachable("HERE!"); + } - // Get the BuildDFG Analysis Results: - // - Dataflow graph - // - Maps from i8* hansles to DFNode and DFEdge - BuildDFG &DFG = getAnalysis<BuildDFG>(); + else if (CallInst *CI = dyn_cast<CallInst>(I)) { + DEBUG(errs() << "Found a call: " << *CI << "\n"); + Function *calleeF = + cast<Function>(CI->getCalledValue()->stripPointerCasts()); + if (calleeF->isDeclaration()) { + // Add the declaration to kernel module + if (calleeF->getName() == "sqrtf") { + calleeF->setName(Twine("sqrt")); + DEBUG(errs() << "CaleeF: " << *calleeF << "\n"); + DEBUG(errs() << "CI: " << *CI << "\n"); + } else if (calleeF->getName() == "rsqrtf") { + calleeF->setName(Twine("rsqrt")); + DEBUG(errs() << "CaleeF: " << *calleeF << "\n"); + DEBUG(errs() << "CI: " << *CI << "\n"); + } + DEBUG(errs() << "Adding declaration to Kernel module: " << *calleeF + << "\n"); + KernelM->getOrInsertFunction(calleeF->getName(), + calleeF->getFunctionType()); + } else { + // Check if the called function has already been cloned before. + Function *NewFunc = CloneAndReplaceCall(CI, calleeF); + // Iterate over the new function to see if it calls any other functions + // in the module. + for (inst_iterator i = inst_begin(NewFunc), e = inst_end(NewFunc); + i != e; ++i) { + if (auto *Call = dyn_cast<CallInst>(&*i)) { + Function *CalledFunc = + cast<Function>(Call->getCalledValue()->stripPointerCasts()); + CloneAndReplaceCall(Call, CalledFunc); + } + } + } + // TODO: how to handle address space qualifiers in load/store + } + } + // search for pattern where float is being casted to int and loaded/stored and + // change it. + DEBUG(errs() << "finding pattern for replacement!\n"); + for (inst_iterator i = inst_begin(F_nvptx), e = inst_end(F_nvptx); i != e; + ++i) { + bool cont = false; + bool keepGEPI = false; + bool keepGEPI2 = false; + Instruction *I = &(*i); + GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I); - // DFInternalNode *Root = DFG.getRoot(); - std::vector<DFInternalNode*> Roots = DFG.getRoots(); - // BuildDFG::HandleToDFNode &HandleToDFNodeMap = DFG.getHandleToDFNodeMap(); - // BuildDFG::HandleToDFEdge &HandleToDFEdgeMap = DFG.getHandleToDFEdgeMap(); + if (!GEPI) { + // did nod find pattern start, continue + continue; + } + // may have found pattern, check + DEBUG(errs() << "GEPI " << *GEPI << "\n"); + // print whatever we want for debug + Value *PtrOp = GEPI->getPointerOperand(); + Type *SrcTy = GEPI->getSourceElementType(); + unsigned GEPIaddrspace = GEPI->getAddressSpace(); + + if (SrcTy->isArrayTy()) + DEBUG(errs() << *SrcTy << " is an array type! " + << *(SrcTy->getArrayElementType()) << "\n"); + else + DEBUG(errs() << *SrcTy << " is not an array type!\n"); + // check that source element type is float + if (SrcTy->isArrayTy()) { + if (!(SrcTy->getArrayElementType()->isFloatTy())) { + DEBUG(errs() << "GEPI type is array but not float!\n"); + continue; + } + } else if (!(SrcTy->isFPOrFPVectorTy() /*isFloatTy()*/)) { + DEBUG(errs() << "GEPI type is " << *SrcTy << "\n"); + // does not fit this pattern - no float GEP instruction + continue; + } + // check that addressspace is 1 + // if (GEPIaddrspace != 1) { + // // does not fit this pattern - addrspace of pointer argument + //is not global continue; + // } + if (!(GEPI->hasOneUse())) { + // does not fit this pattern - more than one uses + // continue; + // Keep GEPI around if it has other uses + keepGEPI = true; + } + DEBUG(errs() << "Found GEPI " << *GEPI << "\n"); + + // 1st GEPI it has one use + // assert(GEPI->hasOneUse() && "GEPI has a single use"); + + // See if it is a bitcast + BitCastInst *BitCastI; + for (User *U : GEPI->users()) { + if (Instruction *ui = dyn_cast<Instruction>(U)) { + DEBUG(errs() << "--" << *ui << "\n"); + if (isa<BitCastInst>(ui)) { + BitCastI = dyn_cast<BitCastInst>(ui); + DEBUG(errs() << "---Found bitcast as only use of GEP\n"); + break; + } + } + DEBUG(errs() << "GEPI does not have a bitcast user, continue\n"); + cont = true; + } + // for (Value::user_iterator ui = GEPI->user_begin(), + // ue = GEPI->user_end(); ui!=ue; ++ui) { + // DEBUG(errs() << "--" << *ui << "\n"); + // if (isa<BitCastInst>(*ui)) { + // BitCastI = dyn_cast<BitCastInst>(*ui); + // DEBUG(errs() << "Found bitcast as only use of GEP\n"); + // } + // } + + if (cont /*!BitCastI*/) { + continue; // not in pattern + } - // Visitor for Code Generation Graph Traversal - CGT_NVPTX *CGTVisitor = new CGT_NVPTX(M, DFG); + // DEBUG(errs() << *BitCastI << "\n"); + // Otherwise, check that first operand is GEP and 2nd is i32*. 1st Operand + // has to be the GEP, since this is a use of the GEP. + Value *Op2 = BitCastI->getOperand(0); + DEBUG(errs() << "----" << *Op2 << "\n"); + // assert(cast<Type>(Op2) && "Invalid Operand for Bitcast\n"); + // Type *OpTy = cast<Type>(Op2); + Type *OpTy = BitCastI->getDestTy(); + DEBUG(errs() << "---- Bitcast destination type: " << *OpTy << "\n"); + // DEBUG(errs() << "---- " << *(Type::getInt32PtrTy(M.getContext(),1)) << + // "\n"); + if (!(OpTy == Type::getInt32PtrTy(M.getContext(), GEPIaddrspace))) { + // maybe right syntax is (Type::getInt32Ty)->getPointerTo() + continue; // not in pattern + } - // Iterate over all the DFGs and produce code for each one of them - for (auto rootNode: Roots) { - // Initiate code generation for root DFNode - CGTVisitor->visit(rootNode); - } + DEBUG(errs() << "----Here!\n"); + // We are in GEP, bitcast. - CGTVisitor->writeKernelsModule(); + // user_iterator, to find the load. - //TODO: Edit module epilogue to remove the VISC intrinsic declarations - delete CGTVisitor; + if (!(BitCastI->hasOneUse())) { + // does not fit this pattern - more than one uses + continue; + } + DEBUG(errs() << "----Bitcast has one use!\n"); + // it has one use + assert(BitCastI->hasOneUse() && "BitCastI has a single use"); + LoadInst *LoadI; + for (User *U : BitCastI->users()) { + if (Instruction *ui = dyn_cast<Instruction>(U)) { + DEBUG(errs() << "-----" << *ui << "\n"); + if (isa<LoadInst>(ui)) { + LoadI = dyn_cast<LoadInst>(ui); + DEBUG(errs() << "-----Found load as only use of bitcast\n"); + break; + } + } + DEBUG(errs() << "Bitcast does not have a load user, continue!\n"); + cont = true; + } + // for (Value::user_iterator ui = BitCastI->user_begin(), + // ue = BitCastI->user_end(); ui!=ue; ++ui) { + // if (isa<LoadInst>(*ui)) { + // LoadI = dyn_cast<LoadInst>(*ui); + // errs() << "Found load as only use of bitcast\n"; + // } + // } + + if (cont) { + continue; // not in pattern + } + + DEBUG("HERE!\n"); + // check that we load from pointer we got from bitcast - assert - the unique + // argument must be the use we found it from + assert(LoadI->getPointerOperand() == BitCastI && + "Unexpected Load Instruction Operand\n"); + + // Copy user_iterator, to find the store. + + if (!(LoadI->hasOneUse())) { + // does not fit this pattern - more than one uses + continue; + // TODO: generalize: one load can have more than one store users + } + + // it has one use + assert(LoadI->hasOneUse() && "LoadI has a single use"); + Value::user_iterator ui = LoadI->user_begin(); + // skipped loop, because is has a single use + StoreInst *StoreI = dyn_cast<StoreInst>(*ui); + if (!StoreI) { + continue; // not in pattern + } - return true; + // Also check that the store uses the loaded value as the value operand + if (StoreI->getValueOperand() != LoadI) { + continue; + } + + DEBUG(errs() << "-------Found store instruction\n"); + + // Look for its bitcast, which is its pointer operand + Value *StPtrOp = StoreI->getPointerOperand(); + DEBUG(errs() << "-------" << *StPtrOp << "\n"); + BitCastInst *BitCastI2 = dyn_cast<BitCastInst>(StPtrOp); + DEBUG(errs() << "-------" << *BitCastI2 << "\n"); + if (!BitCastI2) { + continue; // not in pattern + } + + DEBUG(errs() << "-------- Found Bit Cast of store!\n"); + // found bitcast. Look for the second GEP, its from operand. + Value *BCFromOp = BitCastI2->getOperand(0); + GetElementPtrInst *GEPI2 = dyn_cast<GetElementPtrInst>(BCFromOp); + DEBUG(errs() << "---------- " << *GEPI2 << "\n"); + if (!GEPI2) { + continue; // not in pattern + } + + if (!(GEPI2->hasOneUse())) { + // does not fit this pattern - more than one uses + // continue; + // Keep GEPI around if it has other uses + keepGEPI2 = true; + } + DEBUG(errs() << "---------- Found GEPI of Bitcast!\n"); + + Value *PtrOp2 = GEPI2->getPointerOperand(); + + // Found GEPI2. TODO: kind of confused as o what checks I need to add here, + // let's add them together- all the code for int-float type checks is + // already above. + + // Assume we found pattern + if (!keepGEPI) { + IItoRemove.push_back(GEPI); + DEBUG(errs() << "Pushing " << *GEPI << " for removal\n"); + } else { + DEBUG(errs() << "Keeping " << *GEPI << " since it has multiple uses!\n"); + } + IItoRemove.push_back(BitCastI); + DEBUG(errs() << "Pushing " << *BitCastI << " for removal\n"); + IItoRemove.push_back(LoadI); + DEBUG(errs() << "Pushing " << *LoadI << " for removal\n"); + IItoRemove.push_back(GEPI2); + DEBUG(errs() << "Pushing " << *GEPI2 << " for removal\n"); + IItoRemove.push_back(BitCastI2); + DEBUG(errs() << "Pushing " << *BitCastI2 << " for removal\n"); + if (!keepGEPI2) { + IItoRemove.push_back(StoreI); + DEBUG(errs() << "Pushing " << *StoreI << " for removal\n"); + } else { + + DEBUG(errs() << "Keeping " << *StoreI + << " since it has multiple uses!\n"); + } + + std::vector<Value *> GEPlIndex; + if (GEPI->hasIndices()) { + for (auto ii = GEPI->idx_begin(); ii != GEPI->idx_end(); ++ii) { + Value *Index = dyn_cast<Value>(&*ii); + DEBUG(errs() << "GEP-1 Index: " << *Index << "\n"); + GEPlIndex.push_back(Index); + } + } + // ArrayRef<Value*> GEPlArrayRef(GEPlIndex); + + std::vector<Value *> GEPsIndex; + if (GEPI2->hasIndices()) { + for (auto ii = GEPI2->idx_begin(); ii != GEPI2->idx_end(); ++ii) { + Value *Index = dyn_cast<Value>(&*ii); + DEBUG(errs() << "GEP-2 Index: " << *Index << "\n"); + GEPsIndex.push_back(Index); + } + } + // ArrayRef<Value*> GEPsArrayRef(GEPlIndex); + + // ArrayRef<Value*>(GEPI->idx_begin(), GEPI->idx_end()); + GetElementPtrInst *newlGEP = GetElementPtrInst::Create( + GEPI->getSourceElementType(), // Type::getFloatTy(M.getContext()), + PtrOp, // operand from 1st GEP + ArrayRef<Value *>(GEPlIndex), Twine(), StoreI); + DEBUG(errs() << "Adding: " << *newlGEP << "\n"); + // insert load before GEPI + LoadInst *newLoadI = + new LoadInst(Type::getFloatTy(M.getContext()), + newlGEP, // new GEP + Twine(), LoadI->isVolatile(), LoadI->getAlignment(), + LoadI->getOrdering(), LoadI->getSyncScopeID(), StoreI); + DEBUG(errs() << "Adding: " << *newLoadI << "\n"); + // same for GEP for store, for store operand + GetElementPtrInst *newsGEP = GetElementPtrInst::Create( + GEPI2->getSourceElementType(), // Type::getFloatTy(M.getContext()), + PtrOp2, // operand from 2nd GEP + ArrayRef<Value *>(GEPsIndex), Twine(), StoreI); + DEBUG(errs() << "Adding: " << *newsGEP << "\n"); + // insert store before GEPI + StoreInst *newStoreI = + new StoreInst(newLoadI, + newsGEP, // new GEP + StoreI->isVolatile(), StoreI->getAlignment(), + StoreI->getOrdering(), StoreI->getSyncScopeID(), StoreI); + DEBUG(errs() << "Adding: " << *newStoreI << "\n"); + } + + // We need to do this explicitly: DCE pass will not remove them because we + // have assumed theworst memory behaviour for these function calls + // Traverse the vector backwards, otherwise definitions are deleted while + // their subsequent uses are still around + for (auto *I : reverse(IItoRemove)) { + DEBUG(errs() << "Erasing: " << *I << "\n"); + I->eraseFromParent(); + } + + // Removed the cloned functions from the parent module into the new module + for (auto *F : FuncToBeRemoved) { + F->removeFromParent(); // TODO: MARIA check + KernelM->getFunctionList().push_back(F); + } + + addCLMetadata(F_nvptx); + kernel->KernelFunction = F_nvptx; + DEBUG(errs() << "Identified kernel - " << kernel->KernelFunction->getName() + << "\n"); + DEBUG(errs() << *KernelM); + + return; } -std::string CGT_NVPTX::getKernelsModuleName(Module &M) { - /*SmallString<128> currentDir; - llvm::sys::fs::current_path(currentDir); - std::string fileName = getFilenameFromModule(M); - Twine output = Twine(currentDir) + "/Output/" + fileName + ""; - return output.str().append(".kernels.ll");*/ - std::string mid = M.getModuleIdentifier(); - return mid.append(".kernels.ll"); +bool DFG2LLVM_NVPTX::runOnModule(Module &M) { + DEBUG(errs() << "\nDFG2LLVM_NVPTX PASS\n"); + + // Get the BuildDFG Analysis Results: + // - Dataflow graph + // - Maps from i8* hansles to DFNode and DFEdge + BuildDFG &DFG = getAnalysis<BuildDFG>(); + + // DFInternalNode *Root = DFG.getRoot(); + std::vector<DFInternalNode *> Roots = DFG.getRoots(); + // BuildDFG::HandleToDFNode &HandleToDFNodeMap = + // DFG.getHandleToDFNodeMap(); BuildDFG::HandleToDFEdge &HandleToDFEdgeMap + // = DFG.getHandleToDFEdgeMap(); + + // Visitor for Code Generation Graph Traversal + CGT_NVPTX *CGTVisitor = new CGT_NVPTX(M, DFG); + + // Iterate over all the DFGs and produce code for each one of them + for (auto rootNode : Roots) { + // Initiate code generation for root DFNode + CGTVisitor->visit(rootNode); + } + + CGTVisitor->writeKernelsModule(); + + // TODO: Edit module epilogue to remove the HPVM intrinsic declarations + delete CGTVisitor; + + return true; } -void CGT_NVPTX::fixValueAddrspace(Value* V, unsigned addrspace) { - assert(isa<PointerType>(V->getType()) - && "Value should be of Pointer Type!"); - PointerType* OldTy = cast<PointerType>(V->getType()); - PointerType* NewTy = PointerType::get(OldTy->getElementType(), addrspace); - V->mutateType(NewTy); - for(Value::user_iterator ui = V->user_begin(), ue = V->user_end(); ui != ue; ui++) { - // Change all uses producing pointer type in same address space to new - // addressspace. - if(PointerType* PTy = dyn_cast<PointerType>((*ui)->getType())) { - if(PTy->getAddressSpace() == OldTy->getAddressSpace()) { - fixValueAddrspace(*ui, addrspace); - } - } - } +std::string CGT_NVPTX::getKernelsModuleName(Module &M) { + /*SmallString<128> currentDir; + llvm::sys::fs::current_path(currentDir); + std::string fileName = getFilenameFromModule(M); + Twine output = Twine(currentDir) + "/Output/" + fileName + ""; + return output.str().append(".kernels.ll");*/ + std::string mid = M.getModuleIdentifier(); + return mid.append(".kernels.ll"); } +void CGT_NVPTX::fixValueAddrspace(Value *V, unsigned addrspace) { + assert(isa<PointerType>(V->getType()) && "Value should be of Pointer Type!"); + PointerType *OldTy = cast<PointerType>(V->getType()); + PointerType *NewTy = PointerType::get(OldTy->getElementType(), addrspace); + V->mutateType(NewTy); + for (Value::user_iterator ui = V->user_begin(), ue = V->user_end(); ui != ue; + ui++) { + // Change all uses producing pointer type in same address space to new + // addressspace. + if (PointerType *PTy = dyn_cast<PointerType>((*ui)->getType())) { + if (PTy->getAddressSpace() == OldTy->getAddressSpace()) { + fixValueAddrspace(*ui, addrspace); + } + } + } +} -std::vector<unsigned> CGT_NVPTX::globalToConstantMemoryOpt(std::vector<unsigned>* GlobalMemArgs, Function* F) { - std::vector<unsigned> ConstantMemArgs; - for(Function::arg_iterator ai = F->arg_begin(), ae = F->arg_end(); - ai != ae; ++ai) { - Argument* arg = &*ai; - std::vector<unsigned>::iterator pos = std::find(GlobalMemArgs->begin(), - GlobalMemArgs->end(), arg->getArgNo()); - // It has to be a global memory argument to be promotable - if(pos == GlobalMemArgs->end()) - continue; - - // Check if it can/should be promoted - if(canBePromoted(arg, F)) { - errs() << "Promoting << " << arg->getName() << " to constant memory."<< "\n"; - ConstantMemArgs.push_back(arg->getArgNo()); - GlobalMemArgs->erase(pos); - } - } - return ConstantMemArgs; +std::vector<unsigned> +CGT_NVPTX::globalToConstantMemoryOpt(std::vector<unsigned> *GlobalMemArgs, + Function *F) { + std::vector<unsigned> ConstantMemArgs; + for (Function::arg_iterator ai = F->arg_begin(), ae = F->arg_end(); ai != ae; + ++ai) { + Argument *arg = &*ai; + std::vector<unsigned>::iterator pos = std::find( + GlobalMemArgs->begin(), GlobalMemArgs->end(), arg->getArgNo()); + // It has to be a global memory argument to be promotable + if (pos == GlobalMemArgs->end()) + continue; + + // Check if it can/should be promoted + if (canBePromoted(arg, F)) { + DEBUG(errs() << "Promoting << " << arg->getName() + << " to constant memory." + << "\n"); + ConstantMemArgs.push_back(arg->getArgNo()); + GlobalMemArgs->erase(pos); + } + } + return ConstantMemArgs; } -Function* CGT_NVPTX::changeArgAddrspace(Function* F, std::vector<unsigned> &Args, unsigned addrspace) { - unsigned idx = 0; - std::vector<Type*> ArgTypes; - for(Function::arg_iterator ai = F->arg_begin(), ae = F->arg_end(); - ai != ae; ++ai) { - Argument *arg = &*ai; - DEBUG(errs() << *arg << "\n"); - unsigned argno = arg->getArgNo(); - if ((idx < Args.size()) && (argno == Args[idx])) { - fixValueAddrspace(arg, addrspace); - idx++; - } - ArgTypes.push_back(arg->getType()); - } - FunctionType* newFT = FunctionType::get(F->getReturnType(), ArgTypes, false); - - //F->mutateType(PTy); - Function* newF = cloneFunction(F, newFT, false); - replaceNodeFunctionInIR(*F->getParent(), F, newF); - - DEBUG(errs() << *newF->getFunctionType() << "\n" <<*newF << "\n"); - return newF; +Function *CGT_NVPTX::changeArgAddrspace(Function *F, + std::vector<unsigned> &Args, + unsigned addrspace) { + unsigned idx = 0; + std::vector<Type *> ArgTypes; + for (Function::arg_iterator ai = F->arg_begin(), ae = F->arg_end(); ai != ae; + ++ai) { + Argument *arg = &*ai; + DEBUG(errs() << *arg << "\n"); + unsigned argno = arg->getArgNo(); + if ((idx < Args.size()) && (argno == Args[idx])) { + fixValueAddrspace(arg, addrspace); + idx++; + } + ArgTypes.push_back(arg->getType()); + } + FunctionType *newFT = FunctionType::get(F->getReturnType(), ArgTypes, false); + + // F->mutateType(PTy); + Function *newF = cloneFunction(F, newFT, false); + replaceNodeFunctionInIR(*F->getParent(), F, newF); + + DEBUG(errs() << *newF->getFunctionType() << "\n" << *newF << "\n"); + return newF; } /* Add metadata to module KernelM, for OpenCL kernels */ void CGT_NVPTX::addCLMetadata(Function *F) { - IRBuilder<> Builder(&*F->begin()); + IRBuilder<> Builder(&*F->begin()); + + SmallVector<Metadata *, 8> KernelMD; + KernelMD.push_back(ValueAsMetadata::get(F)); + + // TODO: There is additional metadata used by kernel files but we skip them as + // they are not mandatory. In future they might be useful to enable + // optimizations + + MDTuple *MDKernelNode = MDNode::get(KernelM->getContext(), KernelMD); + NamedMDNode *MDN_kernels = + KernelM->getOrInsertNamedMetadata("opencl.kernels"); + MDN_kernels->addOperand(MDKernelNode); + + KernelMD.push_back(MDString::get(KernelM->getContext(), "kernel")); + // TODO: Replace 1 with the number of the kernel. + // Add when support for multiple launces is added + KernelMD.push_back(ValueAsMetadata::get( + ConstantInt::get(Type::getInt32Ty(KernelM->getContext()), 1))); + MDNode *MDNvvmAnnotationsNode = MDNode::get(KernelM->getContext(), KernelMD); + NamedMDNode *MDN_annotations = + KernelM->getOrInsertNamedMetadata("nvvm.annotations"); + MDN_annotations->addOperand(MDNvvmAnnotationsNode); +} - SmallVector<Metadata*,8> KernelMD; - KernelMD.push_back(ValueAsMetadata::get(F)); +void CGT_NVPTX::writeKernelsModule() { - // TODO: There is additional metadata used by kernel files but we skip them as - // they are not mandatory. In future they might be useful to enable - // optimizations + // In addition to deleting all other functions, we also want to spiff it + // up a little bit. Do this now. + legacy::PassManager Passes; - MDTuple *MDKernelNode = MDNode::get(KernelM->getContext(), KernelMD); - NamedMDNode *MDN_kernels = KernelM->getOrInsertNamedMetadata("opencl.kernels"); - MDN_kernels->addOperand(MDKernelNode); + DEBUG(errs() << "Writing to File --- "); + DEBUG(errs() << getKernelsModuleName(M).c_str() << "\n"); + std::error_code EC; + ToolOutputFile Out(getKernelsModuleName(M).c_str(), EC, sys::fs::F_None); + if (EC) { + DEBUG(errs() << EC.message() << '\n'); + } - KernelMD.push_back(MDString::get(KernelM->getContext(), "kernel")); - // TODO: Replace 1 with the number of the kernel. - // Add when support for multiple launces is added - KernelMD.push_back(ValueAsMetadata::get(ConstantInt::get(Type::getInt32Ty(KernelM->getContext()),1))); - MDNode *MDNvvmAnnotationsNode = MDNode::get(KernelM->getContext(), KernelMD); - NamedMDNode *MDN_annotations = KernelM->getOrInsertNamedMetadata("nvvm.annotations"); - MDN_annotations->addOperand(MDNvvmAnnotationsNode); + Passes.add(createPrintModulePass(Out.os())); + Passes.run(*KernelM); + + // Declare success. + Out.keep(); } -void CGT_NVPTX::writeKernelsModule() { +Function *CGT_NVPTX::transformFunctionToVoid(Function *F) { - // In addition to deleting all other functions, we also want to spiff it - // up a little bit. Do this now. - legacy::PassManager Passes; + DEBUG(errs() << "Transforming function to void: " << F->getName() << "\n"); + // FIXME: Maybe do that using the Node? + StructType *FRetTy = dyn_cast<StructType>(F->getReturnType()); + assert(FRetTy && "Return Type must always be a struct"); - errs() << "Writing to File --- "; - errs() << getKernelsModuleName(M).c_str() << "\n"; - std::error_code EC; - ToolOutputFile Out(getKernelsModuleName(M).c_str(), EC, sys::fs::F_None); - if (EC) { - errs() << EC.message() << '\n'; - } + // Keeps return statements, because we will need to replace them + std::vector<ReturnInst *> RItoRemove; + findReturnInst(F, RItoRemove); - Passes.add( - createPrintModulePass(Out.os())); + std::vector<Type *> RetArgTypes; + std::vector<Argument *> RetArgs; + std::vector<Argument *> Args; + // Check for { } return struct, which means that the function returns void + if (FRetTy->isEmptyTy()) { - Passes.run(*KernelM); + DEBUG(errs() << "\tFunction output struct is void\n"); + DEBUG(errs() << "\tNo parameters added\n"); - // Declare success. - Out.keep(); -} + // Replacing return statements with others returning void + for (auto *RI : RItoRemove) { + ReturnInst::Create((F->getContext()), 0, RI); + RI->eraseFromParent(); + } + DEBUG(errs() << "\tChanged return statements to return void\n"); + } else { + // The struct has return values, thus needs to be converted to parameter + + // Iterate over all element types of return struct and add arguments to the + // function + for (unsigned i = 0; i < FRetTy->getNumElements(); i++) { + Argument *RetArg = + new Argument(FRetTy->getElementType(i)->getPointerTo(), "ret_arg", F); + RetArgs.push_back(RetArg); + RetArgTypes.push_back(RetArg->getType()); + DEBUG(errs() << "\tCreated parameter: " << *RetArg << "\n"); + } -Function* CGT_NVPTX::transformFunctionToVoid(Function* F) { - - DEBUG(errs() << "Transforming function to void: " << F->getName() << "\n"); - // FIXME: Maybe do that using the Node? - StructType* FRetTy = dyn_cast<StructType>(F->getReturnType()); - assert(FRetTy && "Return Type must always be a struct"); - - // Keeps return statements, because we will need to replace them - std::vector<ReturnInst *> RItoRemove; - findReturnInst(F, RItoRemove); - - std::vector<Type *> RetArgTypes; - std::vector<Argument*> RetArgs; - std::vector<Argument*> Args; - // Check for { } return struct, which means that the function returns void - if (FRetTy->isEmptyTy()) { - - DEBUG(errs() << "\tFunction output struct is void\n"); - DEBUG(errs() << "\tNo parameters added\n"); - - // Replacing return statements with others returning void - for (auto *RI : RItoRemove) { - ReturnInst::Create((F->getContext()), 0, RI); - RI->eraseFromParent(); - } - DEBUG(errs() << "\tChanged return statements to return void\n"); - } - else { - // The struct has return values, thus needs to be converted to parameter - - // Iterate over all element types of return struct and add arguments to the - // function - for (unsigned i=0; i<FRetTy->getNumElements(); i++) { - Argument* RetArg = new Argument(FRetTy->getElementType(i)->getPointerTo(), "ret_arg", F); - RetArgs.push_back(RetArg); - RetArgTypes.push_back(RetArg->getType()); - DEBUG(errs() << "\tCreated parameter: " << *RetArg << "\n"); - } - - DEBUG(errs() << "\tReplacing Return statements\n"); - // Replace return statements with extractValue and store instructions - for (auto *RI : RItoRemove) { - Value* RetVal = RI->getReturnValue(); - for(unsigned i = 0; i < RetArgs.size(); i++) { - ExtractValueInst* EI = ExtractValueInst::Create(RetVal, ArrayRef<unsigned>(i), - RetArgs[i]->getName()+".val", RI); - new StoreInst(EI, RetArgs[i], RI); - } - // assert(RetVal && "Return value should not be null at this point"); - // StructType* RetType = cast<StructType>(RetVal->getType()); - // assert(RetType && "Return type is not a struct"); - - ReturnInst::Create((F->getContext()), 0, RI); - RI->eraseFromParent(); - - } - } - DEBUG(errs() << "\tReplaced return statements\n"); - - // Create the argument type list with the added argument's type - std::vector<Type*> ArgTypes; - for(Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end(); - ai != ae; ++ai) { - ArgTypes.push_back(ai->getType()); - } - for(auto *RATy: RetArgTypes) { - ArgTypes.push_back(RATy); - } - - // Creating Args vector to use in cloning! - for(Function::arg_iterator ai = F->arg_begin(), ae = F->arg_end(); - ai != ae; ++ai) { - Args.push_back(&*ai); - } - for(auto *ai : RetArgs) { - Args.push_back(ai); - } - - // Adding new arguments to the function argument list, would not change the - // function type. We need to change the type of this function to reflect the - // added arguments - Type* VoidRetType = Type::getVoidTy(F->getContext()); - FunctionType* newFT = FunctionType::get(VoidRetType, ArgTypes, F->isVarArg()); - - // Change the function type - //F->mutateType(PTy); - Function* newF = cloneFunction(F, newFT, false, NULL, &Args); - replaceNodeFunctionInIR(*F->getParent(), F, newF); - //F->eraseFromParent(); - return newF; + DEBUG(errs() << "\tReplacing Return statements\n"); + // Replace return statements with extractValue and store instructions + for (auto *RI : RItoRemove) { + Value *RetVal = RI->getReturnValue(); + for (unsigned i = 0; i < RetArgs.size(); i++) { + ExtractValueInst *EI = ExtractValueInst::Create( + RetVal, ArrayRef<unsigned>(i), RetArgs[i]->getName() + ".val", RI); + new StoreInst(EI, RetArgs[i], RI); + } + // assert(RetVal && "Return value should not be null at this point"); + // StructType* RetType = cast<StructType>(RetVal->getType()); + // assert(RetType && "Return type is not a struct"); + + ReturnInst::Create((F->getContext()), 0, RI); + RI->eraseFromParent(); + } + } + DEBUG(errs() << "\tReplaced return statements\n"); + + // Create the argument type list with the added argument's type + std::vector<Type *> ArgTypes; + for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end(); + ai != ae; ++ai) { + ArgTypes.push_back(ai->getType()); + } + for (auto *RATy : RetArgTypes) { + ArgTypes.push_back(RATy); + } + + // Creating Args vector to use in cloning! + for (Function::arg_iterator ai = F->arg_begin(), ae = F->arg_end(); ai != ae; + ++ai) { + Args.push_back(&*ai); + } + for (auto *ai : RetArgs) { + Args.push_back(ai); + } + + // Adding new arguments to the function argument list, would not change the + // function type. We need to change the type of this function to reflect the + // added arguments + Type *VoidRetType = Type::getVoidTy(F->getContext()); + FunctionType *newFT = FunctionType::get(VoidRetType, ArgTypes, F->isVarArg()); + + // Change the function type + // F->mutateType(PTy); + Function *newF = cloneFunction(F, newFT, false, NULL, &Args); + replaceNodeFunctionInIR(*F->getParent(), F, newF); + // F->eraseFromParent(); + return newF; } /****************************************************************************** @@ -2102,314 +2130,332 @@ Function* CGT_NVPTX::transformFunctionToVoid(Function* F) { // 1. No stores // 2. Loads not dependent on getNodeInstanceID itrinsic -static bool findLoadStoreUses(Value* V, std::vector<Value*>*UseList, std::vector<Value*>*VisitedList) { - if(std::find(VisitedList->begin(), VisitedList->end(), V) != VisitedList->end()) { - DEBUG(errs() << "\tAlready visited value: " << *V << "\n"); - return false; - } - VisitedList->push_back(V); - for(Value::user_iterator ui = V->user_begin(), ue = V->user_end(); - ui != ue; ++ui) { - Instruction* I = dyn_cast<Instruction>(*ui); - if(!I) { - // if use is not an instruction, then skip it - continue; - } - DEBUG(errs() << "\t" << *I << "\n"); - if(isa<LoadInst>(I)) { - DEBUG(errs() << "\tFound load instruction: " << *I << "\n"); - DEBUG(errs() << "\tAdd to use list: " << *V << "\n"); - UseList->push_back(V); - } - else if(isa<StoreInst>(I) || isa<AtomicRMWInst>(I)) { - // found a store in use chain - DEBUG(errs() << "Found store/atomicrmw instruction: " << *I << "\n"); - return true; - } - else if(BuildDFG::isViscIntrinsic(I)) { - // If it is an atomic intrinsic, we found a store - IntrinsicInst* II = dyn_cast<IntrinsicInst>(I); - assert(II && II->getCalledValue()->getName().startswith("llvm.visc.atomic") - && "Only visc atomic intrinsics can have an argument as input"); - return true; - } - else { - DEBUG(errs() << "\tTraverse use chain of: " << *I << "\n"); - if(findLoadStoreUses(I, UseList, VisitedList)) - return true; - } - } - return false; +static bool findLoadStoreUses(Value *V, std::vector<Value *> *UseList, + std::vector<Value *> *VisitedList) { + if (std::find(VisitedList->begin(), VisitedList->end(), V) != + VisitedList->end()) { + DEBUG(errs() << "\tAlready visited value: " << *V << "\n"); + return false; + } + VisitedList->push_back(V); + for (Value::user_iterator ui = V->user_begin(), ue = V->user_end(); ui != ue; + ++ui) { + Instruction *I = dyn_cast<Instruction>(*ui); + if (!I) { + // if use is not an instruction, then skip it + continue; + } + DEBUG(errs() << "\t" << *I << "\n"); + if (isa<LoadInst>(I)) { + DEBUG(errs() << "\tFound load instruction: " << *I << "\n"); + DEBUG(errs() << "\tAdd to use list: " << *V << "\n"); + UseList->push_back(V); + } else if (isa<StoreInst>(I) || isa<AtomicRMWInst>(I)) { + // found a store in use chain + DEBUG(errs() << "Found store/atomicrmw instruction: " << *I << "\n"); + return true; + } else if (BuildDFG::isHPVMIntrinsic(I)) { + // If it is an atomic intrinsic, we found a store + IntrinsicInst *II = dyn_cast<IntrinsicInst>(I); + assert(II && + II->getCalledValue()->getName().startswith("llvm.hpvm.atomic") && + "Only hpvm atomic intrinsics can have an argument as input"); + return true; + } else { + DEBUG(errs() << "\tTraverse use chain of: " << *I << "\n"); + if (findLoadStoreUses(I, UseList, VisitedList)) + return true; + } + } + return false; } -static bool isDependentOnNodeInstanceID(Value* V, std::vector<Value*>*DependenceList) { - if(std::find(DependenceList->begin(), DependenceList->end(), V) != DependenceList->end()) { - DEBUG(errs() << "\tAlready visited value: " << *V << "\n"); - return false; - } - DependenceList->push_back(V); - // If not an instruction, then not dependent on node instance id - if(!isa<Instruction>(V) || isa<Constant>(V)) { - DEBUG(errs() << "\tStop\n"); - return false; - } - - Instruction* I = cast<Instruction>(V); - for(unsigned i = 0; i < I->getNumOperands(); i++) { - Value* operand = I->getOperand(i); - if(IntrinsicInst* II = dyn_cast<IntrinsicInst>(operand)) { - if((II->getIntrinsicID() == Intrinsic::visc_getNodeInstanceID_x - || II->getIntrinsicID() == Intrinsic::visc_getNodeInstanceID_y - || II->getIntrinsicID() == Intrinsic::visc_getNodeInstanceID_z)) { - Value* Node = II->getArgOperand(0); - IntrinsicInst* GN = dyn_cast<IntrinsicInst>(Node); - assert(GN && "NodeInstanceID operande should be node/parent node intrinsic\n"); - if(GN->getIntrinsicID() == Intrinsic::visc_getNode) { - DEBUG(errs() << "\tDependency found on Node instance ID: " << *II << "\n"); - return true; - } - } - } - if(CmpInst* CI = dyn_cast<CmpInst>(operand)) { - DEBUG(errs() << "Found compare instruction: "<< *CI<<"\nNot following its dependency list\n"); - continue; - } - DEBUG( errs() << "\tTraverse the operand chain of: " << *operand << "\n"); - if(isDependentOnNodeInstanceID(operand, DependenceList)) { - return true; - } - } - return false; +static bool isDependentOnNodeInstanceID(Value *V, + std::vector<Value *> *DependenceList) { + if (std::find(DependenceList->begin(), DependenceList->end(), V) != + DependenceList->end()) { + DEBUG(errs() << "\tAlready visited value: " << *V << "\n"); + return false; + } + DependenceList->push_back(V); + // If not an instruction, then not dependent on node instance id + if (!isa<Instruction>(V) || isa<Constant>(V)) { + DEBUG(errs() << "\tStop\n"); + return false; + } + + Instruction *I = cast<Instruction>(V); + for (unsigned i = 0; i < I->getNumOperands(); i++) { + Value *operand = I->getOperand(i); + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(operand)) { + if ((II->getIntrinsicID() == Intrinsic::hpvm_getNodeInstanceID_x || + II->getIntrinsicID() == Intrinsic::hpvm_getNodeInstanceID_y || + II->getIntrinsicID() == Intrinsic::hpvm_getNodeInstanceID_z)) { + Value *Node = II->getArgOperand(0); + IntrinsicInst *GN = dyn_cast<IntrinsicInst>(Node); + assert( + GN && + "NodeInstanceID operande should be node/parent node intrinsic\n"); + if (GN->getIntrinsicID() == Intrinsic::hpvm_getNode) { + DEBUG(errs() << "\tDependency found on Node instance ID: " << *II + << "\n"); + return true; + } + } + } + if (CmpInst *CI = dyn_cast<CmpInst>(operand)) { + DEBUG(errs() << "Found compare instruction: " << *CI + << "\nNot following its dependency list\n"); + continue; + } + DEBUG(errs() << "\tTraverse the operand chain of: " << *operand << "\n"); + if (isDependentOnNodeInstanceID(operand, DependenceList)) { + return true; + } + } + return false; } // Function to check if argument arg can be changed to a constant memory pointer -static bool canBePromoted(Argument* arg, Function* F) { - DEBUG(errs() << "OPT: Check if Argument " << *arg << " can be changed to constant memory\n"); - std::vector<Value*> UseList; - std::vector<Value*> VisitedList; - // recursively traverse use chain - // if find a store instruction return false, everything fails, cannot be - // promoted - // if find a load instruction as use, add the GEP instruction to list - bool foundStore = findLoadStoreUses(arg, &UseList, &VisitedList); - if(foundStore == true) - return false; - // See that the GEP instructions are not dependent on getNodeInstanceID - // intrinsic - DEBUG(errs() << foundStore << "\tNo Store Instruction found. Check dependence on node instance ID\n"); - std::vector<Value*>DependenceList; - for(auto U: UseList) { - if(isDependentOnNodeInstanceID(U, &DependenceList)) - return false; - } - DEBUG(errs() << "\tYes, Promotable to Constant Memory\n"); - return true; +static bool canBePromoted(Argument *arg, Function *F) { + DEBUG(errs() << "OPT: Check if Argument " << *arg + << " can be changed to constant memory\n"); + std::vector<Value *> UseList; + std::vector<Value *> VisitedList; + // recursively traverse use chain + // if find a store instruction return false, everything fails, cannot be + // promoted + // if find a load instruction as use, add the GEP instruction to list + bool foundStore = findLoadStoreUses(arg, &UseList, &VisitedList); + if (foundStore == true) + return false; + // See that the GEP instructions are not dependent on getNodeInstanceID + // intrinsic + DEBUG(errs() << foundStore + << "\tNo Store Instruction found. Check dependence on node " + "instance ID\n"); + std::vector<Value *> DependenceList; + for (auto U : UseList) { + if (isDependentOnNodeInstanceID(U, &DependenceList)) + return false; + } + DEBUG(errs() << "\tYes, Promotable to Constant Memory\n"); + return true; } - // Calculate execute node parameters which include, number of diemnsions for // dynamic instances of the kernel, local and global work group sizes. -static void getExecuteNodeParams(Module &M, Value* &workDim, Value* &LocalWGPtr, Value* - &GlobalWGPtr, Kernel* kernel, ValueToValueMapTy& VMap, Instruction* IB) { - - // Assign number of dimenstions a constant value - workDim = ConstantInt::get(Type::getInt32Ty(M.getContext()), kernel->gridDim); - - // If local work group size if null - if(!kernel->hasLocalWG()) { - LocalWGPtr = Constant::getNullValue(Type::getInt64PtrTy(M.getContext())); - } - else { - for(unsigned i = 0; i < kernel->localWGSize.size(); i++) { - if(isa<Argument>(kernel->localWGSize[i])) - kernel->localWGSize[i] = VMap[kernel->localWGSize[i]]; - } - LocalWGPtr = genWorkGroupPtr(M, kernel->localWGSize, VMap, IB, "LocalWGSize"); - } - - for(unsigned i = 0; i < kernel->globalWGSize.size(); i++) { - if(isa<Argument>(kernel->globalWGSize[i])) - kernel->globalWGSize[i] = VMap[kernel->globalWGSize[i]]; - } - - // For OpenCL, global work group size is the total bumber of instances in each - // dimension. So, multiply local and global dim limits. - std::vector<Value*> globalWGSizeInsts; - if(kernel->hasLocalWG()) { - for (unsigned i = 0; i < kernel->gridDim; i++) { - BinaryOperator* MulInst = BinaryOperator::Create(Instruction::Mul, kernel->globalWGSize[i], kernel->localWGSize[i], "", IB); - globalWGSizeInsts.push_back(MulInst); - } - } - else { - globalWGSizeInsts = kernel->globalWGSize; - } - GlobalWGPtr = genWorkGroupPtr(M, globalWGSizeInsts, VMap, IB, "GlobalWGSize"); - DEBUG(errs() << "Pointer to global work group: " << *GlobalWGPtr << "\n"); +static void getExecuteNodeParams(Module &M, Value *&workDim, Value *&LocalWGPtr, + Value *&GlobalWGPtr, Kernel *kernel, + ValueToValueMapTy &VMap, Instruction *IB) { + + // Assign number of dimenstions a constant value + workDim = ConstantInt::get(Type::getInt32Ty(M.getContext()), kernel->gridDim); + + // If local work group size if null + if (!kernel->hasLocalWG()) { + LocalWGPtr = Constant::getNullValue(Type::getInt64PtrTy(M.getContext())); + } else { + for (unsigned i = 0; i < kernel->localWGSize.size(); i++) { + if (isa<Argument>(kernel->localWGSize[i])) + kernel->localWGSize[i] = VMap[kernel->localWGSize[i]]; + } + LocalWGPtr = + genWorkGroupPtr(M, kernel->localWGSize, VMap, IB, "LocalWGSize"); + } + + for (unsigned i = 0; i < kernel->globalWGSize.size(); i++) { + if (isa<Argument>(kernel->globalWGSize[i])) + kernel->globalWGSize[i] = VMap[kernel->globalWGSize[i]]; + } + + // For OpenCL, global work group size is the total bumber of instances in each + // dimension. So, multiply local and global dim limits. + std::vector<Value *> globalWGSizeInsts; + if (kernel->hasLocalWG()) { + for (unsigned i = 0; i < kernel->gridDim; i++) { + BinaryOperator *MulInst = + BinaryOperator::Create(Instruction::Mul, kernel->globalWGSize[i], + kernel->localWGSize[i], "", IB); + globalWGSizeInsts.push_back(MulInst); + } + } else { + globalWGSizeInsts = kernel->globalWGSize; + } + GlobalWGPtr = genWorkGroupPtr(M, globalWGSizeInsts, VMap, IB, "GlobalWGSize"); + DEBUG(errs() << "Pointer to global work group: " << *GlobalWGPtr << "\n"); } // CodeGen for allocating space for Work Group on stack and returning a pointer // to its address -static Value* genWorkGroupPtr(Module &M, std::vector<Value*> WGSize, ValueToValueMapTy& VMap, Instruction* IB, const Twine& WGName) { - Value* WGPtr; - // Get int64_t and or ease of use - Type* Int64Ty = Type::getInt64Ty(M.getContext()); - - // Work Group type is [#dim x i64] - Type* WGTy = ArrayType::get(Int64Ty, WGSize.size()); - // Allocate space of Global work group data on stack and get pointer to - // first element. - AllocaInst* WG = new AllocaInst(WGTy, 0, WGName, IB); - WGPtr = BitCastInst::CreatePointerCast(WG, Int64Ty->getPointerTo(), WG->getName()+".0", IB); - Value* nextDim = WGPtr; - DEBUG(errs() << *WGPtr << "\n"); - - // Iterate over the number of dimensions and store the global work group - // size in that dimension - for(unsigned i=0; i < WGSize.size(); i++) { - DEBUG(errs() << *WGSize[i] << "\n"); - assert(WGSize[i]->getType()->isIntegerTy() && "Dimension not an integer type!"); - - if(WGSize[i]->getType() != Int64Ty) { - // If number of dimensions are mentioned in any other integer format, - // generate code to extend it to i64. We need to use the mapped value in - // the new generated function, hence the use of VMap - // FIXME: Why are we changing the kernel WGSize vector here? - DEBUG(errs() << "Not i64. Zero extend required.\n"); - DEBUG(errs() << *WGSize[i] << "\n"); - CastInst* CI = BitCastInst::CreateIntegerCast(WGSize[i], Int64Ty, true, "", IB); - DEBUG(errs() << "Bitcast done.\n"); - StoreInst* SI = new StoreInst(CI, nextDim, IB); - DEBUG(errs() << "Zero extend done.\n"); - DEBUG(errs() << "\tZero extended work group size: " << *SI << "\n"); - } else { - // Store the value representing work group size in ith dimension on - // stack - StoreInst* SI = new StoreInst(WGSize[i], nextDim, IB); - - DEBUG(errs() << "\t Work group size: " << *SI << "\n"); - } - if(i+1 < WGSize.size()) { - // Move to next dimension - GetElementPtrInst* GEP = GetElementPtrInst::Create(nullptr, nextDim, - ArrayRef<Value*>(ConstantInt::get(Int64Ty, 1)), - WG->getName()+"."+Twine(i+1), - IB); - DEBUG(errs() << "\tPointer to next dimension on stack: " << *GEP << "\n"); - nextDim = GEP; - } - } - return WGPtr; +static Value *genWorkGroupPtr(Module &M, std::vector<Value *> WGSize, + ValueToValueMapTy &VMap, Instruction *IB, + const Twine &WGName) { + Value *WGPtr; + // Get int64_t and or ease of use + Type *Int64Ty = Type::getInt64Ty(M.getContext()); + + // Work Group type is [#dim x i64] + Type *WGTy = ArrayType::get(Int64Ty, WGSize.size()); + // Allocate space of Global work group data on stack and get pointer to + // first element. + AllocaInst *WG = new AllocaInst(WGTy, 0, WGName, IB); + WGPtr = BitCastInst::CreatePointerCast(WG, Int64Ty->getPointerTo(), + WG->getName() + ".0", IB); + Value *nextDim = WGPtr; + DEBUG(errs() << *WGPtr << "\n"); + + // Iterate over the number of dimensions and store the global work group + // size in that dimension + for (unsigned i = 0; i < WGSize.size(); i++) { + DEBUG(errs() << *WGSize[i] << "\n"); + assert(WGSize[i]->getType()->isIntegerTy() && + "Dimension not an integer type!"); + + if (WGSize[i]->getType() != Int64Ty) { + // If number of dimensions are mentioned in any other integer format, + // generate code to extend it to i64. We need to use the mapped value in + // the new generated function, hence the use of VMap + // FIXME: Why are we changing the kernel WGSize vector here? + DEBUG(errs() << "Not i64. Zero extend required.\n"); + DEBUG(errs() << *WGSize[i] << "\n"); + CastInst *CI = + BitCastInst::CreateIntegerCast(WGSize[i], Int64Ty, true, "", IB); + DEBUG(errs() << "Bitcast done.\n"); + StoreInst *SI = new StoreInst(CI, nextDim, IB); + DEBUG(errs() << "Zero extend done.\n"); + DEBUG(errs() << "\tZero extended work group size: " << *SI << "\n"); + } else { + // Store the value representing work group size in ith dimension on + // stack + StoreInst *SI = new StoreInst(WGSize[i], nextDim, IB); + DEBUG(errs() << "\t Work group size: " << *SI << "\n"); + } + if (i + 1 < WGSize.size()) { + // Move to next dimension + GetElementPtrInst *GEP = GetElementPtrInst::Create( + nullptr, nextDim, ArrayRef<Value *>(ConstantInt::get(Int64Ty, 1)), + WG->getName() + "." + Twine(i + 1), IB); + DEBUG(errs() << "\tPointer to next dimension on stack: " << *GEP << "\n"); + nextDim = GEP; + } + } + return WGPtr; } // Get generated PTX binary name -static std::string getPTXFilename(const Module& M) { - std::string moduleID = M.getModuleIdentifier(); - moduleID.append(".kernels.cl"); - return moduleID; +static std::string getPTXFilename(const Module &M) { + std::string moduleID = M.getModuleIdentifier(); + moduleID.append(".kernels.cl"); + return moduleID; } // Get the name of the input file from module ID -static std::string getFilenameFromModule(const Module& M) { - std::string moduleID = M.getModuleIdentifier(); - return moduleID.substr(moduleID.find_last_of("/")+1); +static std::string getFilenameFromModule(const Module &M) { + std::string moduleID = M.getModuleIdentifier(); + return moduleID.substr(moduleID.find_last_of("/") + 1); } // Changes the data layout of the Module to be compiled with NVPTX backend // TODO: Figure out when to call it, probably after duplicating the modules static void changeDataLayout(Module &M) { - std::string nvptx32_layoutStr = "e-p:32:32-i64:64-v16:16-v32:32-n16:32:64"; - std::string nvptx64_layoutStr = "e-i64:64-v16:16-v32:32-n16:32:64"; + std::string nvptx32_layoutStr = "e-p:32:32-i64:64-v16:16-v32:32-n16:32:64"; + std::string nvptx64_layoutStr = "e-i64:64-v16:16-v32:32-n16:32:64"; - if (TARGET_PTX == 32) - M.setDataLayout(StringRef(nvptx32_layoutStr)); - else if (TARGET_PTX == 64) - M.setDataLayout(StringRef(nvptx64_layoutStr)); - else assert(false && "Invalid PTX target"); + if (TARGET_PTX == 32) + M.setDataLayout(StringRef(nvptx32_layoutStr)); + else if (TARGET_PTX == 64) + M.setDataLayout(StringRef(nvptx64_layoutStr)); + else + assert(false && "Invalid PTX target"); - return; + return; } static void changeTargetTriple(Module &M) { - std::string nvptx32_TargetTriple = "nvptx--nvidiacl"; - std::string nvptx64_TargetTriple = "nvptx64--nvidiacl"; + std::string nvptx32_TargetTriple = "nvptx--nvidiacl"; + std::string nvptx64_TargetTriple = "nvptx64--nvidiacl"; - if (TARGET_PTX == 32) - M.setTargetTriple(StringRef(nvptx32_TargetTriple)); - else if (TARGET_PTX == 64) - M.setTargetTriple(StringRef(nvptx64_TargetTriple)); - else assert(false && "Invalid PTX target"); + if (TARGET_PTX == 32) + M.setTargetTriple(StringRef(nvptx32_TargetTriple)); + else if (TARGET_PTX == 64) + M.setTargetTriple(StringRef(nvptx64_TargetTriple)); + else + assert(false && "Invalid PTX target"); - return; + return; } // Helper function, populate a vector with all return statements in a function -static void findReturnInst(Function* F, std::vector<ReturnInst *> & ReturnInstVec) { - for (auto &BB : *F) { - if(auto *RI = dyn_cast<ReturnInst>(BB.getTerminator())) - ReturnInstVec.push_back(RI); - } +static void findReturnInst(Function *F, + std::vector<ReturnInst *> &ReturnInstVec) { + for (auto &BB : *F) { + if (auto *RI = dyn_cast<ReturnInst>(BB.getTerminator())) + ReturnInstVec.push_back(RI); + } } -// Helper function, populate a vector with all IntrinsicID intrinsics in a function -static void findIntrinsicInst(Function* F, Intrinsic::ID IntrinsicID, std::vector<IntrinsicInst *> & IntrinsicInstVec) { - for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i) { - Instruction *I = &(*i); - IntrinsicInst* II = dyn_cast<IntrinsicInst>(I); - if (II && II->getIntrinsicID() == IntrinsicID) { - IntrinsicInstVec.push_back(II); - } - } +// Helper function, populate a vector with all IntrinsicID intrinsics in a +// function +static void findIntrinsicInst(Function *F, Intrinsic::ID IntrinsicID, + std::vector<IntrinsicInst *> &IntrinsicInstVec) { + for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i) { + Instruction *I = &(*i); + IntrinsicInst *II = dyn_cast<IntrinsicInst>(I); + if (II && II->getIntrinsicID() == IntrinsicID) { + IntrinsicInstVec.push_back(II); + } + } } -// Helper funtion, returns the atomicrmw op, corresponding to intrinsic atomic op +// Helper funtion, returns the atomicrmw op, corresponding to intrinsic atomic +// op static AtomicRMWInst::BinOp getAtomicOp(Intrinsic::ID ID) { - switch(ID) { - case Intrinsic::visc_atomic_add: - return AtomicRMWInst::Add; - case Intrinsic::visc_atomic_sub: - return AtomicRMWInst::Sub; - case Intrinsic::visc_atomic_min: - return AtomicRMWInst::Min; - case Intrinsic::visc_atomic_max: - return AtomicRMWInst::Max; - case Intrinsic::visc_atomic_xchg: - return AtomicRMWInst::Xchg; - case Intrinsic::visc_atomic_and: - return AtomicRMWInst::And; - case Intrinsic::visc_atomic_or: - return AtomicRMWInst::Or; - case Intrinsic::visc_atomic_xor: - return AtomicRMWInst::Xor; - default: - llvm_unreachable("Unsupported atomic intrinsic!"); - }; + switch (ID) { + case Intrinsic::hpvm_atomic_add: + return AtomicRMWInst::Add; + case Intrinsic::hpvm_atomic_sub: + return AtomicRMWInst::Sub; + case Intrinsic::hpvm_atomic_min: + return AtomicRMWInst::Min; + case Intrinsic::hpvm_atomic_max: + return AtomicRMWInst::Max; + case Intrinsic::hpvm_atomic_xchg: + return AtomicRMWInst::Xchg; + case Intrinsic::hpvm_atomic_and: + return AtomicRMWInst::And; + case Intrinsic::hpvm_atomic_or: + return AtomicRMWInst::Or; + case Intrinsic::hpvm_atomic_xor: + return AtomicRMWInst::Xor; + default: + llvm_unreachable("Unsupported atomic intrinsic!"); + }; } - // Helper funtion, returns the OpenCL function name, corresponding to atomic op static std::string getAtomicOpName(Intrinsic::ID ID) { - switch(ID) { - case Intrinsic::visc_atomic_add: - return "atom_add"; - case Intrinsic::visc_atomic_sub: - return "atom_sub"; - case Intrinsic::visc_atomic_min: - return "atom_min"; - case Intrinsic::visc_atomic_max: - return "atom_max"; - case Intrinsic::visc_atomic_xchg: - return "atom_xchg"; - case Intrinsic::visc_atomic_and: - return "atom_and"; - case Intrinsic::visc_atomic_or: - return "atom_or"; - case Intrinsic::visc_atomic_xor: - return "atom_xor"; - default: - llvm_unreachable("Unsupported atomic intrinsic!"); - }; + switch (ID) { + case Intrinsic::hpvm_atomic_add: + return "atom_add"; + case Intrinsic::hpvm_atomic_sub: + return "atom_sub"; + case Intrinsic::hpvm_atomic_min: + return "atom_min"; + case Intrinsic::hpvm_atomic_max: + return "atom_max"; + case Intrinsic::hpvm_atomic_xchg: + return "atom_xchg"; + case Intrinsic::hpvm_atomic_and: + return "atom_and"; + case Intrinsic::hpvm_atomic_or: + return "atom_or"; + case Intrinsic::hpvm_atomic_xor: + return "atom_xor"; + default: + llvm_unreachable("Unsupported atomic intrinsic!"); + }; } } // End of namespace @@ -2420,4 +2466,3 @@ static RegisterPass<DFG2LLVM_NVPTX> X("dfg2llvm-nvptx", false /* does not modify the CFG */, true /* transformation, * * not just analysis */); - diff --git a/hpvm/lib/Transforms/DFG2LLVM_X86/DFG2LLVM_X86.cpp b/hpvm/lib/Transforms/DFG2LLVM_X86/DFG2LLVM_X86.cpp index 6498b46cd9a56ad69df35d4497b463b9dda98c87..21adabf4ebe5999134491f163aa8119d44f84f10 100644 --- a/hpvm/lib/Transforms/DFG2LLVM_X86/DFG2LLVM_X86.cpp +++ b/hpvm/lib/Transforms/DFG2LLVM_X86/DFG2LLVM_X86.cpp @@ -8,34 +8,33 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "DFG2LLVM_X86" -#include "llvm/IR/Module.h" -#include "llvm/Pass.h" +#include "SupportHPVM/DFG2LLVM.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/InstIterator.h" -#include "llvm/Transforms/Utils/ValueMapper.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/IR/Module.h" #include "llvm/IRReader/IRReader.h" #include "llvm/Linker/Linker.h" +#include "llvm/Pass.h" #include "llvm/Support/SourceMgr.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/Constant.h" -#include "SupportVISC/DFG2LLVM.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/ValueMapper.h" using namespace llvm; using namespace builddfg; using namespace dfg2llvm; -// VISC Command line option to use timer or not -static cl::opt<bool> -VISCTimer_X86("visc-timers-x86", cl::desc("Enable visc timers")); +// HPVM Command line option to use timer or not +static cl::opt<bool> HPVMTimer_X86("hpvm-timers-x86", + cl::desc("Enable hpvm timers")); namespace { - // DFG2LLVM_X86 - The first implementation. struct DFG2LLVM_X86 : public DFG2LLVM { static char ID; // Pass identification, replacement for typeid - DFG2LLVM_X86() :DFG2LLVM(ID) {} + DFG2LLVM_X86() : DFG2LLVM(ID) {} private: // Member variables @@ -50,58 +49,59 @@ public: class CGT_X86 : public CodeGenTraversal { private: - //Member variables + // Member variables FunctionCallee malloc; - // VISC Runtime API - FunctionCallee llvm_visc_x86_launch; - FunctionCallee llvm_visc_x86_wait; - FunctionCallee llvm_visc_x86_argument_ptr; - - FunctionCallee llvm_visc_streamLaunch; - FunctionCallee llvm_visc_streamPush; - FunctionCallee llvm_visc_streamPop; - FunctionCallee llvm_visc_streamWait; - FunctionCallee llvm_visc_createBindInBuffer; - FunctionCallee llvm_visc_createBindOutBuffer; - FunctionCallee llvm_visc_createEdgeBuffer; - FunctionCallee llvm_visc_createLastInputBuffer; - FunctionCallee llvm_visc_createThread; - FunctionCallee llvm_visc_bufferPush; - FunctionCallee llvm_visc_bufferPop; - FunctionCallee llvm_visc_x86_dstack_push; - FunctionCallee llvm_visc_x86_dstack_pop; - FunctionCallee llvm_visc_x86_getDimLimit; - FunctionCallee llvm_visc_x86_getDimInstance; - - //Functions - std::vector<IntrinsicInst*>* getUseList(Value* LI); - Value* addLoop(Instruction* I, Value* limit, const Twine& indexName = ""); - void addWhileLoop(Instruction*, Instruction*, Instruction*, Value*); + // HPVM Runtime API + FunctionCallee llvm_hpvm_x86_launch; + FunctionCallee llvm_hpvm_x86_wait; + FunctionCallee llvm_hpvm_x86_argument_ptr; + + FunctionCallee llvm_hpvm_streamLaunch; + FunctionCallee llvm_hpvm_streamPush; + FunctionCallee llvm_hpvm_streamPop; + FunctionCallee llvm_hpvm_streamWait; + FunctionCallee llvm_hpvm_createBindInBuffer; + FunctionCallee llvm_hpvm_createBindOutBuffer; + FunctionCallee llvm_hpvm_createEdgeBuffer; + FunctionCallee llvm_hpvm_createLastInputBuffer; + FunctionCallee llvm_hpvm_createThread; + FunctionCallee llvm_hpvm_bufferPush; + FunctionCallee llvm_hpvm_bufferPop; + FunctionCallee llvm_hpvm_x86_dstack_push; + FunctionCallee llvm_hpvm_x86_dstack_pop; + FunctionCallee llvm_hpvm_x86_getDimLimit; + FunctionCallee llvm_hpvm_x86_getDimInstance; + + // Functions + std::vector<IntrinsicInst *> *getUseList(Value *LI); + Value *addLoop(Instruction *I, Value *limit, const Twine &indexName = ""); + void addWhileLoop(Instruction *, Instruction *, Instruction *, Value *); Instruction *addWhileLoopCounter(BasicBlock *, BasicBlock *, BasicBlock *); - Argument* getArgumentFromEnd(Function* F, unsigned offset); - Value* getInValueAt(DFNode* Child, unsigned i, Function* ParentF_X86, - Instruction* InsertBefore); - void invokeChild_X86(DFNode* C, Function* F_X86, ValueToValueMapTy &VMap, - Instruction* InsertBefore); - void invokeChild_PTX(DFNode* C, Function* F_X86, ValueToValueMapTy &VMap, - Instruction* InsertBefore); - StructType* getArgumentListStructTy(DFNode*); - Function* createFunctionFilter(DFNode* C); - void startNodeThread(DFNode*, std::vector<Value*>, DenseMap<DFEdge*, Value*>, - Value*, Value*, Instruction*); - Function* createLaunchFunction(DFInternalNode*); - + Argument *getArgumentFromEnd(Function *F, unsigned offset); + Value *getInValueAt(DFNode *Child, unsigned i, Function *ParentF_X86, + Instruction *InsertBefore); + void invokeChild_X86(DFNode *C, Function *F_X86, ValueToValueMapTy &VMap, + Instruction *InsertBefore); + void invokeChild_PTX(DFNode *C, Function *F_X86, ValueToValueMapTy &VMap, + Instruction *InsertBefore); + StructType *getArgumentListStructTy(DFNode *); + Function *createFunctionFilter(DFNode *C); + void startNodeThread(DFNode *, std::vector<Value *>, + DenseMap<DFEdge *, Value *>, Value *, Value *, + Instruction *); + Function *createLaunchFunction(DFInternalNode *); + // Virtual Functions void init() { - VISCTimer = VISCTimer_X86; + HPVMTimer = HPVMTimer_X86; TargetName = "X86"; } void initRuntimeAPI(); - void codeGen(DFInternalNode* N); - void codeGen(DFLeafNode* N); - Function* codeGenStreamPush(DFInternalNode* N); - Function* codeGenStreamPop(DFInternalNode* N); + void codeGen(DFInternalNode *N); + void codeGen(DFLeafNode *N); + Function *codeGenStreamPush(DFInternalNode *N); + Function *codeGenStreamPop(DFInternalNode *N); public: // Constructor @@ -110,8 +110,8 @@ public: initRuntimeAPI(); } - void codeGenLaunch(DFInternalNode* Root); - void codeGenLaunchStreaming(DFInternalNode* Root); + void codeGenLaunch(DFInternalNode *Root); + void codeGenLaunchStreaming(DFInternalNode *Root); }; bool DFG2LLVM_X86::runOnModule(Module &M) { @@ -122,8 +122,8 @@ bool DFG2LLVM_X86::runOnModule(Module &M) { // - Maps from i8* hansles to DFNode and DFEdge BuildDFG &DFG = getAnalysis<BuildDFG>(); - //DFInternalNode *Root = DFG.getRoot(); - std::vector<DFInternalNode*> Roots = DFG.getRoots(); + // DFInternalNode *Root = DFG.getRoot(); + std::vector<DFInternalNode *> Roots = DFG.getRoots(); // BuildDFG::HandleToDFNode &HandleToDFNodeMap = DFG.getHandleToDFNodeMap(); // BuildDFG::HandleToDFEdge &HandleToDFEdgeMap = DFG.getHandleToDFEdgeMap(); @@ -131,16 +131,17 @@ bool DFG2LLVM_X86::runOnModule(Module &M) { CGT_X86 *CGTVisitor = new CGT_X86(M, DFG); // Iterate over all the DFGs and produce code for each one of them - for (auto &rootNode: Roots) { + for (auto &rootNode : Roots) { // Initiate code generation for root DFNode CGTVisitor->visit(rootNode); - // Go ahead and replace the launch intrinsic with pthread call, otherwise return now. + // Go ahead and replace the launch intrinsic with pthread call, otherwise + // return now. // TODO: Later on, we might like to do this in a separate pass, which would - // allow us the flexibility to switch between complete static code generation - // for DFG or having a customized runtime+scheduler - + // allow us the flexibility to switch between complete static code + // generation for DFG or having a customized runtime+scheduler + // Do streaming code generation if root node is streaming. Usual otherwise - if(rootNode->isChildGraphStreaming()) + if (rootNode->isChildGraphStreaming()) CGTVisitor->codeGenLaunchStreaming(rootNode); else CGTVisitor->codeGenLaunch(rootNode); @@ -150,61 +151,61 @@ bool DFG2LLVM_X86::runOnModule(Module &M) { return true; } -// Initialize the VISC runtime API. This makes it easier to insert these calls +// Initialize the HPVM runtime API. This makes it easier to insert these calls void CGT_X86::initRuntimeAPI() { // Load Runtime API Module SMDiagnostic Err; - char* LLVM_SRC_ROOT = getenv("LLVM_SRC_ROOT"); + char *LLVM_SRC_ROOT = getenv("LLVM_SRC_ROOT"); assert(LLVM_SRC_ROOT != NULL && "Define LLVM_SRC_ROOT environment variable!"); Twine llvmSrcRoot = LLVM_SRC_ROOT; - Twine runtimeAPI = llvmSrcRoot + "/../build/tools/hpvm/projects/visc-rt/visc-rt.bc"; + Twine runtimeAPI = + llvmSrcRoot + "/../build/tools/hpvm/projects/hpvm-rt/hpvm-rt.bc"; runtimeModule = parseIRFile(runtimeAPI.str(), Err, M.getContext()); - if(runtimeModule == nullptr) { + if (runtimeModule == nullptr) { DEBUG(errs() << Err.getMessage() << " " << runtimeAPI << "\n"); assert(false && "couldn't parse runtime"); - } - else - DEBUG(errs() << "Successfully loaded visc-rt API module\n"); + } else + DEBUG(errs() << "Successfully loaded hpvm-rt API module\n"); // Get or insert the global declarations for launch/wait functions - DECLARE(llvm_visc_x86_launch); + DECLARE(llvm_hpvm_x86_launch); DECLARE(malloc); - DECLARE(llvm_visc_x86_wait); - DECLARE(llvm_visc_x86_argument_ptr); - DECLARE(llvm_visc_streamLaunch); - DECLARE(llvm_visc_streamPush); - DECLARE(llvm_visc_streamPop); - DECLARE(llvm_visc_streamWait); - DECLARE(llvm_visc_createBindInBuffer); - DECLARE(llvm_visc_createBindOutBuffer); - DECLARE(llvm_visc_createEdgeBuffer); - DECLARE(llvm_visc_createLastInputBuffer); - DECLARE(llvm_visc_createThread); - DECLARE(llvm_visc_bufferPush); - DECLARE(llvm_visc_bufferPop); - DECLARE(llvm_visc_x86_dstack_push); - DECLARE(llvm_visc_x86_dstack_pop); - DECLARE(llvm_visc_x86_getDimLimit); - DECLARE(llvm_visc_x86_getDimInstance); + DECLARE(llvm_hpvm_x86_wait); + DECLARE(llvm_hpvm_x86_argument_ptr); + DECLARE(llvm_hpvm_streamLaunch); + DECLARE(llvm_hpvm_streamPush); + DECLARE(llvm_hpvm_streamPop); + DECLARE(llvm_hpvm_streamWait); + DECLARE(llvm_hpvm_createBindInBuffer); + DECLARE(llvm_hpvm_createBindOutBuffer); + DECLARE(llvm_hpvm_createEdgeBuffer); + DECLARE(llvm_hpvm_createLastInputBuffer); + DECLARE(llvm_hpvm_createThread); + DECLARE(llvm_hpvm_bufferPush); + DECLARE(llvm_hpvm_bufferPop); + DECLARE(llvm_hpvm_x86_dstack_push); + DECLARE(llvm_hpvm_x86_dstack_pop); + DECLARE(llvm_hpvm_x86_getDimLimit); + DECLARE(llvm_hpvm_x86_getDimInstance); // Get or insert timerAPI functions as well if you plan to use timers initTimerAPI(); // Insert init context in main - Function* VI = M.getFunction("llvm.visc.init"); - assert(VI->getNumUses() == 1 && "__visc__init should only be used once"); + Function *VI = M.getFunction("llvm.hpvm.init"); + assert(VI->getNumUses() == 1 && "__hpvm__init should only be used once"); DEBUG(errs() << "Inserting x86 timer initialization\n"); - Instruction* I = cast<Instruction>(*VI->user_begin()); + Instruction *I = cast<Instruction>(*VI->user_begin()); initializeTimerSet(I); - switchToTimer(visc_TimerID_NONE, I); - // Insert print instruction at visc exit - Function* VC = M.getFunction("llvm.visc.cleanup"); - assert(VC->getNumUses() == 1 && "__visc__cleanup should only be used once"); + switchToTimer(hpvm_TimerID_NONE, I); + // Insert print instruction at hpvm exit + Function *VC = M.getFunction("llvm.hpvm.cleanup"); + assert(VC->getNumUses() == 1 && "__hpvm__cleanup should only be used once"); DEBUG(errs() << "Inserting x86 timer print\n"); printTimerSet(I); @@ -212,12 +213,13 @@ void CGT_X86::initRuntimeAPI() { /* Returns vector of all wait instructions */ -std::vector<IntrinsicInst*>* CGT_X86::getUseList(Value* GraphID) { - std::vector<IntrinsicInst*>* UseList = new std::vector<IntrinsicInst*>(); +std::vector<IntrinsicInst *> *CGT_X86::getUseList(Value *GraphID) { + std::vector<IntrinsicInst *> *UseList = new std::vector<IntrinsicInst *>(); // It must have been loaded from memory somewhere - for(Value::user_iterator ui = GraphID->user_begin(), - ue = GraphID->user_end(); ui!=ue; ++ui) { - if(IntrinsicInst* waitI = dyn_cast<IntrinsicInst>(*ui)) { + for (Value::user_iterator ui = GraphID->user_begin(), + ue = GraphID->user_end(); + ui != ue; ++ui) { + if (IntrinsicInst *waitI = dyn_cast<IntrinsicInst>(*ui)) { UseList->push_back(waitI); } else { llvm_unreachable("Error: Operation on Graph ID not supported!\n"); @@ -229,14 +231,14 @@ std::vector<IntrinsicInst*>* CGT_X86::getUseList(Value* GraphID) { /* Traverse the function argument list in reverse order to get argument at a * distance offset fromt he end of argument list of function F */ -Argument* CGT_X86::getArgumentFromEnd(Function* F, unsigned offset) { - assert((F->getFunctionType()->getNumParams() >= offset && offset > 0) - && "Invalid offset to access arguments!"); +Argument *CGT_X86::getArgumentFromEnd(Function *F, unsigned offset) { + assert((F->getFunctionType()->getNumParams() >= offset && offset > 0) && + "Invalid offset to access arguments!"); Function::arg_iterator e = F->arg_end(); // Last element of argument iterator is dummy. Skip it. e--; - Argument* arg; - for( ; offset != 0; e--) { + Argument *arg; + for (; offset != 0; e--) { offset--; arg = &*e; } @@ -254,25 +256,24 @@ Argument* CGT_X86::getArgumentFromEnd(Function* F, unsigned offset) { * which loops over bidy if true and goes to end if false * (5) Update phi node of body */ -void CGT_X86::addWhileLoop(Instruction* CondBlockStart, Instruction* BodyStart, - Instruction* BodyEnd, Value* TerminationCond) { - BasicBlock* Entry = CondBlockStart->getParent(); - BasicBlock* CondBlock = Entry->splitBasicBlock(CondBlockStart, "condition"); - BasicBlock* WhileBody = CondBlock->splitBasicBlock(BodyStart, "while.body"); - BasicBlock* WhileEnd = WhileBody->splitBasicBlock(BodyEnd, "while.end"); +void CGT_X86::addWhileLoop(Instruction *CondBlockStart, Instruction *BodyStart, + Instruction *BodyEnd, Value *TerminationCond) { + BasicBlock *Entry = CondBlockStart->getParent(); + BasicBlock *CondBlock = Entry->splitBasicBlock(CondBlockStart, "condition"); + BasicBlock *WhileBody = CondBlock->splitBasicBlock(BodyStart, "while.body"); + BasicBlock *WhileEnd = WhileBody->splitBasicBlock(BodyEnd, "while.end"); // Replace the terminator instruction of conditional with new conditional // branch which goes to while.body if true and branches to while.end otherwise - BranchInst* BI = BranchInst::Create(WhileEnd, WhileBody, TerminationCond); + BranchInst *BI = BranchInst::Create(WhileEnd, WhileBody, TerminationCond); ReplaceInstWithInst(CondBlock->getTerminator(), BI); // While Body should jump to condition block - BranchInst* UnconditionalBranch = BranchInst::Create(CondBlock); + BranchInst *UnconditionalBranch = BranchInst::Create(CondBlock); ReplaceInstWithInst(WhileBody->getTerminator(), UnconditionalBranch); - } -Instruction* CGT_X86::addWhileLoopCounter(BasicBlock *Entry, BasicBlock *Cond, +Instruction *CGT_X86::addWhileLoopCounter(BasicBlock *Entry, BasicBlock *Cond, BasicBlock *Body) { Module *M = Entry->getParent()->getParent(); Type *Int64Ty = Type::getInt64Ty(M->getContext()); @@ -282,10 +283,10 @@ Instruction* CGT_X86::addWhileLoopCounter(BasicBlock *Entry, BasicBlock *Cond, PHINode *CounterPhi = PHINode::Create(Int64Ty, 2, "cnt", IB); ConstantInt *IConst = - ConstantInt::get(Type::getInt64Ty(M->getContext()), 1, true); + ConstantInt::get(Type::getInt64Ty(M->getContext()), 1, true); Instruction *CounterIncr = - BinaryOperator::CreateNSW(Instruction::BinaryOps::Add, CounterPhi, IConst, - "cnt_incr", Body->getTerminator()); + BinaryOperator::CreateNSW(Instruction::BinaryOps::Add, CounterPhi, IConst, + "cnt_incr", Body->getTerminator()); // Set incoming values for Phi node IConst = ConstantInt::get(Type::getInt64Ty(M->getContext()), 0, true); @@ -307,39 +308,40 @@ Instruction* CGT_X86::addWhileLoopCounter(BasicBlock *Entry, BasicBlock *Cond, * which loops over bidy if true and goes to end if false * (5) Update phi node of body */ -Value* CGT_X86::addLoop(Instruction* I, Value* limit, const Twine& indexName) { - BasicBlock* Entry = I->getParent(); - BasicBlock* ForBody = Entry->splitBasicBlock(I, "for.body"); +Value *CGT_X86::addLoop(Instruction *I, Value *limit, const Twine &indexName) { + BasicBlock *Entry = I->getParent(); + BasicBlock *ForBody = Entry->splitBasicBlock(I, "for.body"); BasicBlock::iterator i(I); ++i; - Instruction* NextI = &*i; + Instruction *NextI = &*i; // Next Instruction should also belong to the same basic block as the basic // block will have a terminator instruction - assert(NextI->getParent() == ForBody - && "Next Instruction should also belong to the same basic block!"); - BasicBlock* ForEnd = ForBody->splitBasicBlock(NextI, "for.end"); - + assert(NextI->getParent() == ForBody && + "Next Instruction should also belong to the same basic block!"); + BasicBlock *ForEnd = ForBody->splitBasicBlock(NextI, "for.end"); // Add Phi Node for index variable - PHINode* IndexPhi = PHINode::Create(Type::getInt64Ty(I->getContext()), - 2, "index."+indexName, I); + PHINode *IndexPhi = PHINode::Create(Type::getInt64Ty(I->getContext()), 2, + "index." + indexName, I); // Add incoming edge to phi IndexPhi->addIncoming(ConstantInt::get(Type::getInt64Ty(I->getContext()), 0), Entry); // Increment index variable - BinaryOperator* IndexInc = BinaryOperator::Create(Instruction::Add, - IndexPhi, ConstantInt::get(Type::getInt64Ty(I->getContext()), 1), - "index."+indexName+".inc", ForBody->getTerminator()); + BinaryOperator *IndexInc = BinaryOperator::Create( + Instruction::Add, IndexPhi, + ConstantInt::get(Type::getInt64Ty(I->getContext()), 1), + "index." + indexName + ".inc", ForBody->getTerminator()); // Compare index variable with limit - CmpInst* Cond = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULT, IndexInc, - limit, "cond."+indexName, ForBody->getTerminator()); + CmpInst *Cond = + CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULT, IndexInc, limit, + "cond." + indexName, ForBody->getTerminator()); // Replace the terminator instruction of for.body with new conditional // branch which loops over body if true and branches to for.end otherwise - BranchInst* BI = BranchInst::Create(ForBody, ForEnd, Cond); + BranchInst *BI = BranchInst::Create(ForBody, ForEnd, Cond); ReplaceInstWithInst(ForBody->getTerminator(), BI); // Add incoming edge to phi node in body @@ -351,260 +353,274 @@ Value* CGT_X86::addLoop(Instruction* I, Value* limit, const Twine& indexName) { // types, output types and isLastInput buffer type. All the streaming // inputs/outputs are converted to i8*, since this is the type of buffer // handles. -StructType* CGT_X86::getArgumentListStructTy(DFNode* C) { - std::vector<Type*> TyList; +StructType *CGT_X86::getArgumentListStructTy(DFNode *C) { + std::vector<Type *> TyList; // Input types - Function* CF = C->getFuncPointer(); - for(Function::arg_iterator ai = CF->arg_begin(), ae = CF->arg_end(); - ai != ae; ++ai) { - if(C->getInDFEdgeAt(ai->getArgNo())->isStreamingEdge()) + Function *CF = C->getFuncPointer(); + for (Function::arg_iterator ai = CF->arg_begin(), ae = CF->arg_end(); + ai != ae; ++ai) { + if (C->getInDFEdgeAt(ai->getArgNo())->isStreamingEdge()) TyList.push_back(Type::getInt8PtrTy(CF->getContext())); - else + else TyList.push_back(ai->getType()); } // Output Types - StructType* OutStructTy = cast<StructType>(CF->getReturnType()); + StructType *OutStructTy = cast<StructType>(CF->getReturnType()); for (unsigned i = 0; i < OutStructTy->getNumElements(); i++) { // All outputs of a node are streaming edge - assert(C->getOutDFEdgeAt(i)->isStreamingEdge() - && "All output edges of child node have to be streaming"); + assert(C->getOutDFEdgeAt(i)->isStreamingEdge() && + "All output edges of child node have to be streaming"); TyList.push_back(Type::getInt8PtrTy(CF->getContext())); } // isLastInput buffer element TyList.push_back(Type::getInt8PtrTy(CF->getContext())); - StructType* STy = StructType::create(CF->getContext(), TyList, - Twine("struct.thread."+CF->getName()).str(), true); + StructType *STy = + StructType::create(CF->getContext(), TyList, + Twine("struct.thread." + CF->getName()).str(), true); return STy; - } -void CGT_X86::startNodeThread(DFNode* C, std::vector<Value*> Args, DenseMap<DFEdge*, Value*> - EdgeBufferMap, Value* isLastInputBuffer, Value* graphID, - Instruction* IB) { - DEBUG(errs() << "Starting Pipeline for child node: " << C->getFuncPointer()->getName() << "\n"); +void CGT_X86::startNodeThread(DFNode *C, std::vector<Value *> Args, + DenseMap<DFEdge *, Value *> EdgeBufferMap, + Value *isLastInputBuffer, Value *graphID, + Instruction *IB) { + DEBUG(errs() << "Starting Pipeline for child node: " + << C->getFuncPointer()->getName() << "\n"); // Create a filter/pipeline function for the child node - Function* C_Pipeline = createFunctionFilter(C); - Function* CF = C->getFuncPointer(); + Function *C_Pipeline = createFunctionFilter(C); + Function *CF = C->getFuncPointer(); // Get module context and i32 0 constant, as they would be frequently used in // this function. - LLVMContext& Ctx = IB->getParent()->getContext(); - Constant* IntZero = ConstantInt::get(Type::getInt32Ty(Ctx), 0); + LLVMContext &Ctx = IB->getParent()->getContext(); + Constant *IntZero = ConstantInt::get(Type::getInt32Ty(Ctx), 0); // Marshall arguments // Create a packed struct type with inputs of C followed by outputs and then // another i8* to indicate isLastInput buffer. Streaming inputs are replaced // by i8* // - StructType* STy = getArgumentListStructTy(C); + StructType *STy = getArgumentListStructTy(C); // Allocate the struct on heap *NOT* stack and bitcast i8* to STy* - CallInst* CI = CallInst::Create(malloc, ArrayRef<Value*>(ConstantExpr::getSizeOf(STy)), - C->getFuncPointer()->getName()+".inputs", IB); - CastInst* Struct = BitCastInst::CreatePointerCast(CI, STy->getPointerTo(), CI->getName()+".i8ptr", IB); - //AllocaInst* AI = new AllocaInst(STy, C->getFuncPointer()->getName()+".inputs", IB); + CallInst *CI = + CallInst::Create(malloc, ArrayRef<Value *>(ConstantExpr::getSizeOf(STy)), + C->getFuncPointer()->getName() + ".inputs", IB); + CastInst *Struct = BitCastInst::CreatePointerCast( + CI, STy->getPointerTo(), CI->getName() + ".i8ptr", IB); + // AllocaInst* AI = new AllocaInst(STy, + // C->getFuncPointer()->getName()+".inputs", IB); // Insert elements in the struct - DEBUG(errs() << "Marshall inputs for child node: " << C->getFuncPointer()->getName() << "\n"); + DEBUG(errs() << "Marshall inputs for child node: " + << C->getFuncPointer()->getName() << "\n"); // Marshall Inputs - for(unsigned i=0; i < CF->getFunctionType()->getNumParams(); i++) { + for (unsigned i = 0; i < CF->getFunctionType()->getNumParams(); i++) { // Create constant int (i) - Constant* Int_i = ConstantInt::get(Type::getInt32Ty(Ctx), i); + Constant *Int_i = ConstantInt::get(Type::getInt32Ty(Ctx), i); // Get Element pointer instruction - Value* GEPIndices[] = { IntZero, Int_i }; - GetElementPtrInst* GEP = GetElementPtrInst::Create(nullptr, Struct, - ArrayRef<Value*>(GEPIndices, 2), - Struct->getName()+".arg_"+Twine(i), - IB); - DFEdge* E = C->getInDFEdgeAt(i); + Value *GEPIndices[] = {IntZero, Int_i}; + GetElementPtrInst *GEP = GetElementPtrInst::Create( + nullptr, Struct, ArrayRef<Value *>(GEPIndices, 2), + Struct->getName() + ".arg_" + Twine(i), IB); + DFEdge *E = C->getInDFEdgeAt(i); if (E->getSourceDF()->isEntryNode()) { // This is a Bind Input Edge - if(E->isStreamingEdge()) { + if (E->isStreamingEdge()) { // Streaming Bind Input edge. Get buffer corresponding to it - assert(EdgeBufferMap.count(E) && "No mapping buffer for a Streaming Bind DFEdge!"); + assert(EdgeBufferMap.count(E) && + "No mapping buffer for a Streaming Bind DFEdge!"); new StoreInst(EdgeBufferMap[E], GEP, IB); - } - else { + } else { // Non-streaming Bind edge new StoreInst(Args[i], GEP, IB); } - } - else { - // This is an edge between siblings. + } else { + // This is an edge between siblings. // This must be an streaming edge. As it is our assumption that all edges // between two nodes in a DFG are streaming. - assert(EdgeBufferMap.count(E) && "No mapping buffer for a Streaming DFEdge!"); + assert(EdgeBufferMap.count(E) && + "No mapping buffer for a Streaming DFEdge!"); new StoreInst(EdgeBufferMap[E], GEP, IB); } } unsigned numInputs = CF->getFunctionType()->getNumParams(); unsigned numOutputs = cast<StructType>(CF->getReturnType())->getNumElements(); // Marshall Outputs - DEBUG(errs() << "Marshall outputs for child node: " << C->getFuncPointer()->getName() << "\n"); - for(unsigned i = 0; i < numOutputs; i++ ) { + DEBUG(errs() << "Marshall outputs for child node: " + << C->getFuncPointer()->getName() << "\n"); + for (unsigned i = 0; i < numOutputs; i++) { // Create constant int (i+numInputs) - Constant* Int_i = ConstantInt::get(Type::getInt32Ty(Ctx), i+numInputs); + Constant *Int_i = ConstantInt::get(Type::getInt32Ty(Ctx), i + numInputs); // Get Element pointer instruction - Value* GEPIndices[] = { IntZero, Int_i }; - GetElementPtrInst* GEP = GetElementPtrInst::Create(nullptr, Struct, - ArrayRef<Value*>(GEPIndices, 2), - Struct->getName()+".out_"+Twine(i), - IB); - DFEdge* E = C->getOutDFEdgeAt(i); - assert(E->isStreamingEdge() && "Output Edge must be streaming of all nodes"); - assert(EdgeBufferMap.count(E) && "No mapping buffer for a Out Streaming DFEdge!"); + Value *GEPIndices[] = {IntZero, Int_i}; + GetElementPtrInst *GEP = GetElementPtrInst::Create( + nullptr, Struct, ArrayRef<Value *>(GEPIndices, 2), + Struct->getName() + ".out_" + Twine(i), IB); + DFEdge *E = C->getOutDFEdgeAt(i); + assert(E->isStreamingEdge() && + "Output Edge must be streaming of all nodes"); + assert(EdgeBufferMap.count(E) && + "No mapping buffer for a Out Streaming DFEdge!"); new StoreInst(EdgeBufferMap[E], GEP, IB); } // Marshall last argument. isLastInput buffer - DEBUG(errs() << "Marshall isLastInput for child node: " << C->getFuncPointer()->getName() << "\n"); + DEBUG(errs() << "Marshall isLastInput for child node: " + << C->getFuncPointer()->getName() << "\n"); // Create constant int (i+numInputs) - Constant* Int_index = ConstantInt::get(Type::getInt32Ty(Ctx), numInputs+numOutputs); + Constant *Int_index = + ConstantInt::get(Type::getInt32Ty(Ctx), numInputs + numOutputs); // Get Element pointer instruction - Value* GEPIndices[] = { IntZero, Int_index }; - GetElementPtrInst* GEP = GetElementPtrInst::Create(nullptr, Struct, - ArrayRef<Value*>(GEPIndices, 2), - Struct->getName()+".isLastInput", IB); + Value *GEPIndices[] = {IntZero, Int_index}; + GetElementPtrInst *GEP = GetElementPtrInst::Create( + nullptr, Struct, ArrayRef<Value *>(GEPIndices, 2), + Struct->getName() + ".isLastInput", IB); new StoreInst(isLastInputBuffer, GEP, IB); // AllocaInst AI points to memory with all the arguments packed // Call runtime to create the thread with these arguments - DEBUG(errs() << "Start Thread for child node: " << C->getFuncPointer()->getName() << "\n"); -// DEBUG(errs() << *llvm_visc_createThread << "\n"); + DEBUG(errs() << "Start Thread for child node: " + << C->getFuncPointer()->getName() << "\n"); + // DEBUG(errs() << *llvm_hpvm_createThread << "\n"); DEBUG(errs() << *graphID->getType() << "\n"); DEBUG(errs() << *C_Pipeline->getType() << "\n"); DEBUG(errs() << *Struct->getType() << "\n"); // Bitcast AI to i8* - CastInst* BI = BitCastInst::CreatePointerCast(Struct, Type::getInt8PtrTy(Ctx), Struct->getName(), IB); - Value* CreateThreadArgs[] = {graphID, C_Pipeline, BI}; - CallInst::Create(llvm_visc_createThread, ArrayRef<Value*>(CreateThreadArgs, 3), "", IB); - + CastInst *BI = BitCastInst::CreatePointerCast(Struct, Type::getInt8PtrTy(Ctx), + Struct->getName(), IB); + Value *CreateThreadArgs[] = {graphID, C_Pipeline, BI}; + CallInst::Create(llvm_hpvm_createThread, + ArrayRef<Value *>(CreateThreadArgs, 3), "", IB); } -Function* CGT_X86::createLaunchFunction(DFInternalNode* N) { +Function *CGT_X86::createLaunchFunction(DFInternalNode *N) { DEBUG(errs() << "Generating Streaming Launch Function\n"); // Get Function associated with Node N - Function* NF = N->getFuncPointer(); + Function *NF = N->getFuncPointer(); - // Map from Streaming edge to buffer - DenseMap<DFEdge*, Value*> EdgeBufferMap; + // Map from Streaming edge to buffer + DenseMap<DFEdge *, Value *> EdgeBufferMap; /* Now we have all the necessary global declarations necessary to generate the - * Launch function, pointer to which can be passed to pthread utils to execute - * DFG. The Launch function has just one input: i8* data.addr - * This is the address of the all the input data that needs to be passed to - * this function. In our case it contains the input arguments of the Root - * function in the correct order. - * (1) Create an empty Launch function of type void (i8* args, i8* GraphID) - * (2) Extract each of inputs from data.addr - * (3) create Buffers for all the streaming edges - * - Put buffers in the context - * (4) Go over each child node - * - marshall its arguments together (use buffers in place of streaming - * arguments) - * - Start the threads - * (5) The return value from Root is stored in memory, pointer to which is - * passed to pthread_exit call. - */ + * Launch function, pointer to which can be passed to pthread utils to execute + * DFG. The Launch function has just one input: i8* data.addr + * This is the address of the all the input data that needs to be passed to + * this function. In our case it contains the input arguments of the Root + * function in the correct order. + * (1) Create an empty Launch function of type void (i8* args, i8* GraphID) + * (2) Extract each of inputs from data.addr + * (3) create Buffers for all the streaming edges + * - Put buffers in the context + * (4) Go over each child node + * - marshall its arguments together (use buffers in place of streaming + * arguments) + * - Start the threads + * (5) The return value from Root is stored in memory, pointer to which is + * passed to pthread_exit call. + */ // (1) Create Launch Function of type void (i8* args, i8* GraphID) - Type* i8Ty = Type::getInt8Ty(M.getContext()); - Type* ArgTypes[] = {i8Ty->getPointerTo(), i8Ty->getPointerTo()}; - FunctionType* LaunchFuncTy = FunctionType::get(Type::getVoidTy(NF->getContext()), - ArrayRef<Type*>(ArgTypes, 2), false); - Function* LaunchFunc = Function::Create(LaunchFuncTy, - NF->getLinkage(), - NF->getName()+".LaunchFunction", - &M); + Type *i8Ty = Type::getInt8Ty(M.getContext()); + Type *ArgTypes[] = {i8Ty->getPointerTo(), i8Ty->getPointerTo()}; + FunctionType *LaunchFuncTy = FunctionType::get( + Type::getVoidTy(NF->getContext()), ArrayRef<Type *>(ArgTypes, 2), false); + Function *LaunchFunc = Function::Create( + LaunchFuncTy, NF->getLinkage(), NF->getName() + ".LaunchFunction", &M); DEBUG(errs() << "Generating Code for Streaming Launch Function\n"); // Give a name to the argument which is used pass data to this thread - Argument* data = &*LaunchFunc->arg_begin(); + Argument *data = &*LaunchFunc->arg_begin(); // NOTE-HS: Check correctness with Maria - Argument* graphID = &*(LaunchFunc->arg_begin() + 1); + Argument *graphID = &*(LaunchFunc->arg_begin() + 1); data->setName("data.addr"); graphID->setName("graphID"); // Add a basic block to this empty function and a return null statement to it DEBUG(errs() << *LaunchFunc->getReturnType() << "\n"); - BasicBlock *BB = BasicBlock::Create(LaunchFunc->getContext(), "entry", LaunchFunc); - ReturnInst* RI = ReturnInst::Create(LaunchFunc->getContext(), - BB); + BasicBlock *BB = + BasicBlock::Create(LaunchFunc->getContext(), "entry", LaunchFunc); + ReturnInst *RI = ReturnInst::Create(LaunchFunc->getContext(), BB); DEBUG(errs() << "Created Empty Launch Function\n"); // (2) Extract each of inputs from data.addr - std::vector<Type*> TyList; + std::vector<Type *> TyList; std::vector<std::string> names; - std::vector<Value*> Args; + std::vector<Value *> Args; for (Function::arg_iterator ai = NF->arg_begin(), ae = NF->arg_end(); - ai != ae; ++ai) { - if(N->getChildGraph()->getEntry()->getOutDFEdgeAt(ai->getArgNo())->isStreamingEdge()) { + ai != ae; ++ai) { + if (N->getChildGraph() + ->getEntry() + ->getOutDFEdgeAt(ai->getArgNo()) + ->isStreamingEdge()) { TyList.push_back(i8Ty->getPointerTo()); - names.push_back(Twine(ai->getName()+"_buffer").str()); + names.push_back(Twine(ai->getName() + "_buffer").str()); continue; } TyList.push_back(ai->getType()); names.push_back(ai->getName()); } Args = extractElements(data, TyList, names, RI); - DEBUG(errs() << "Launch function for " << NF->getName() << *LaunchFunc << "\n"); + DEBUG(errs() << "Launch function for " << NF->getName() << *LaunchFunc + << "\n"); // (3) Create buffers for all the streaming edges - for(DFGraph::dfedge_iterator di = N->getChildGraph()->dfedge_begin(), - de = N->getChildGraph()->dfedge_end(); di != de; ++di) { - DFEdge* Edge = *di; + for (DFGraph::dfedge_iterator di = N->getChildGraph()->dfedge_begin(), + de = N->getChildGraph()->dfedge_end(); + di != de; ++di) { + DFEdge *Edge = *di; DEBUG(errs() << *Edge->getType() << "\n"); - Value* size = ConstantExpr::getSizeOf(Edge->getType()); - Value* CallArgs[] = {graphID, size}; + Value *size = ConstantExpr::getSizeOf(Edge->getType()); + Value *CallArgs[] = {graphID, size}; if (Edge->isStreamingEdge()) { - CallInst* CI; + CallInst *CI; // Create a buffer call - if(Edge->getSourceDF()->isEntryNode()) { + if (Edge->getSourceDF()->isEntryNode()) { // Bind Input Edge - Constant* Int_ArgNo = ConstantInt::get(Type::getInt32Ty(RI->getContext()), - Edge->getSourcePosition()); - Value* BindInCallArgs[] = {graphID, size, Int_ArgNo}; - CI = CallInst::Create(llvm_visc_createBindInBuffer, ArrayRef<Value*>(BindInCallArgs, 3), - "BindIn."+Edge->getDestDF()->getFuncPointer()->getName(), - RI); - } - else if(Edge->getDestDF()->isExitNode()) { + Constant *Int_ArgNo = ConstantInt::get( + Type::getInt32Ty(RI->getContext()), Edge->getSourcePosition()); + Value *BindInCallArgs[] = {graphID, size, Int_ArgNo}; + CI = CallInst::Create( + llvm_hpvm_createBindInBuffer, ArrayRef<Value *>(BindInCallArgs, 3), + "BindIn." + Edge->getDestDF()->getFuncPointer()->getName(), RI); + } else if (Edge->getDestDF()->isExitNode()) { // Bind Output Edge - CI = CallInst::Create(llvm_visc_createBindOutBuffer, ArrayRef<Value*>(CallArgs, 2), - "BindOut."+Edge->getSourceDF()->getFuncPointer()->getName(), - RI); - } - else { + CI = CallInst::Create( + llvm_hpvm_createBindOutBuffer, ArrayRef<Value *>(CallArgs, 2), + "BindOut." + Edge->getSourceDF()->getFuncPointer()->getName(), RI); + } else { // Streaming Edge - CI = CallInst::Create(llvm_visc_createEdgeBuffer, - ArrayRef<Value*>(CallArgs, 2), - Edge->getSourceDF()->getFuncPointer()->getName()+"." - +Edge->getDestDF()->getFuncPointer()->getName(), - RI); + CI = CallInst::Create( + llvm_hpvm_createEdgeBuffer, ArrayRef<Value *>(CallArgs, 2), + Edge->getSourceDF()->getFuncPointer()->getName() + "." + + Edge->getDestDF()->getFuncPointer()->getName(), + RI); } EdgeBufferMap[Edge] = CI; } } // Create buffer for isLastInput for all the child nodes - DFGraph* G = N->getChildGraph(); - DenseMap<DFNode*, Value*> NodeLastInputMap; - for(DFGraph::children_iterator ci = G->begin(), ce = G->end(); ci != ce; ++ci) { - DFNode* child = *ci; - if(child->isDummyNode()) + DFGraph *G = N->getChildGraph(); + DenseMap<DFNode *, Value *> NodeLastInputMap; + for (DFGraph::children_iterator ci = G->begin(), ce = G->end(); ci != ce; + ++ci) { + DFNode *child = *ci; + if (child->isDummyNode()) continue; - Value* size = ConstantExpr::getSizeOf(Type::getInt64Ty(NF->getContext())); - Value* CallArgs[] = {graphID, size}; - CallInst* CI = CallInst::Create(llvm_visc_createLastInputBuffer, ArrayRef<Value*>(CallArgs, 2), - "BindIn.isLastInput."+child->getFuncPointer()->getName(), - RI); + Value *size = ConstantExpr::getSizeOf(Type::getInt64Ty(NF->getContext())); + Value *CallArgs[] = {graphID, size}; + CallInst *CI = CallInst::Create( + llvm_hpvm_createLastInputBuffer, ArrayRef<Value *>(CallArgs, 2), + "BindIn.isLastInput." + child->getFuncPointer()->getName(), RI); NodeLastInputMap[child] = CI; } - DEBUG(errs() << "Start Each child node filter\n"); + DEBUG(errs() << "Start Each child node filter\n"); // (4) Marshall arguments for each child node and start the thread with its // pipeline funtion - for(DFGraph::children_iterator ci = N->getChildGraph()->begin(), - ce = N->getChildGraph()->end(); ci != ce; ++ci) { - DFNode* C = *ci; + for (DFGraph::children_iterator ci = N->getChildGraph()->begin(), + ce = N->getChildGraph()->end(); + ci != ce; ++ci) { + DFNode *C = *ci; // Skip dummy node call if (C->isDummyNode()) continue; - + // Marshall all the arguments for this node into an i8* // Pass to the runtime to create the thread // Start the thread for child node C @@ -617,7 +633,6 @@ Function* CGT_X86::createLaunchFunction(DFInternalNode* N) { return LaunchFunc; } - /* This fuction does the steps necessary to launch a streaming graph * Steps * Create Pipeline/Filter function for each node in child graph of Root @@ -625,167 +640,158 @@ Function* CGT_X86::createLaunchFunction(DFInternalNode* N) { * Modify each of the instrinsic in host code * Launch, Push, Pop, Wait */ -void CGT_X86::codeGenLaunchStreaming(DFInternalNode* Root) { - IntrinsicInst* LI = Root->getInstruction(); - Function* RootLaunch = createLaunchFunction(Root); +void CGT_X86::codeGenLaunchStreaming(DFInternalNode *Root) { + IntrinsicInst *LI = Root->getInstruction(); + Function *RootLaunch = createLaunchFunction(Root); // Substitute launch intrinsic main - DEBUG(errs() << "Substitute launch intrinsic\n"); - Value* LaunchInstArgs[] = {RootLaunch, - LI->getArgOperand(1) - }; - CallInst* LaunchInst = CallInst::Create(llvm_visc_streamLaunch, - ArrayRef<Value*>(LaunchInstArgs,2), - "graph"+Root->getFuncPointer()->getName(), LI); + DEBUG(errs() << "Substitute launch intrinsic\n"); + Value *LaunchInstArgs[] = {RootLaunch, LI->getArgOperand(1)}; + CallInst *LaunchInst = CallInst::Create( + llvm_hpvm_streamLaunch, ArrayRef<Value *>(LaunchInstArgs, 2), + "graph" + Root->getFuncPointer()->getName(), LI); DEBUG(errs() << *LaunchInst << "\n"); // Replace all wait instructions with x86 specific wait instructions - DEBUG(errs() << "Substitute wait, push, pop intrinsics\n"); - std::vector<IntrinsicInst*>* UseList = getUseList(LI); - for(unsigned i=0; i < UseList->size(); ++i) { - IntrinsicInst* II = UseList->at(i); - CallInst* CI; - Value* PushArgs[] = {LaunchInst, II->getOperand(1)}; - switch(II->getIntrinsicID()) { - case Intrinsic::visc_wait: - CI = CallInst::Create(llvm_visc_streamWait, - ArrayRef<Value*>(LaunchInst), + DEBUG(errs() << "Substitute wait, push, pop intrinsics\n"); + std::vector<IntrinsicInst *> *UseList = getUseList(LI); + for (unsigned i = 0; i < UseList->size(); ++i) { + IntrinsicInst *II = UseList->at(i); + CallInst *CI; + Value *PushArgs[] = {LaunchInst, II->getOperand(1)}; + switch (II->getIntrinsicID()) { + case Intrinsic::hpvm_wait: + CI = CallInst::Create(llvm_hpvm_streamWait, ArrayRef<Value *>(LaunchInst), ""); break; - case Intrinsic::visc_push: - CI = CallInst::Create(llvm_visc_streamPush, - ArrayRef<Value*>(PushArgs, 2), - ""); + case Intrinsic::hpvm_push: + CI = CallInst::Create(llvm_hpvm_streamPush, + ArrayRef<Value *>(PushArgs, 2), ""); break; - case Intrinsic::visc_pop: - CI = CallInst::Create(llvm_visc_streamPop, - ArrayRef<Value*>(LaunchInst), + case Intrinsic::hpvm_pop: + CI = CallInst::Create(llvm_hpvm_streamPop, ArrayRef<Value *>(LaunchInst), ""); break; default: - llvm_unreachable("GraphID is used by an instruction other than wait, push, pop"); + llvm_unreachable( + "GraphID is used by an instruction other than wait, push, pop"); }; DEBUG(errs() << "Replace:\n\t" << *II << "\n"); ReplaceInstWithInst(II, CI); DEBUG(errs() << "\twith " << *CI << "\n"); } - - } -void CGT_X86::codeGenLaunch(DFInternalNode* Root) { +void CGT_X86::codeGenLaunch(DFInternalNode *Root) { // TODO: Place an assert to check if the constant passed by launch intrinsic // as the number of arguments to DFG is same as the number of arguments of the // root of DFG DEBUG(errs() << "Generating Launch Function\n"); // Get Launch Instruction - IntrinsicInst* LI = Root->getInstruction(); - switchToTimer(visc_TimerID_PTHREAD_CREATE, LI); + IntrinsicInst *LI = Root->getInstruction(); + switchToTimer(hpvm_TimerID_PTHREAD_CREATE, LI); DEBUG(errs() << "Generating Launch Function\n"); /* Now we have all the necessary global declarations necessary to generate the - * Launch function, pointer to which can be passed to pthread utils to execute - * DFG. The Launch function has just one input: i8* data.addr - * This is the address of the all the input data that needs to be passed to - * this function. In our case it contains the input arguments of the Root - * function in the correct order. - * (1) Create an empty Launch function of type i8*(i8*) - * (2) Extract each of inputs from data.addr and pass them as arguments to the - * call to Root function - * (3) The return value from Root is stored in memory, pointer to which is - * passed to pthread_exit call. - */ + * Launch function, pointer to which can be passed to pthread utils to execute + * DFG. The Launch function has just one input: i8* data.addr + * This is the address of the all the input data that needs to be passed to + * this function. In our case it contains the input arguments of the Root + * function in the correct order. + * (1) Create an empty Launch function of type i8*(i8*) + * (2) Extract each of inputs from data.addr and pass them as arguments to the + * call to Root function + * (3) The return value from Root is stored in memory, pointer to which is + * passed to pthread_exit call. + */ // Create Launch Function of type i8*(i8*) which calls the root function - Type* i8Ty = Type::getInt8Ty(M.getContext()); - FunctionType* AppFuncTy = FunctionType::get(i8Ty->getPointerTo(), - ArrayRef<Type*>(i8Ty->getPointerTo()), - false); - Function* AppFunc = Function::Create(AppFuncTy, - Root->getFuncPointer()->getLinkage(), - "LaunchDataflowGraph", - &M); + Type *i8Ty = Type::getInt8Ty(M.getContext()); + FunctionType *AppFuncTy = FunctionType::get( + i8Ty->getPointerTo(), ArrayRef<Type *>(i8Ty->getPointerTo()), false); + Function *AppFunc = + Function::Create(AppFuncTy, Root->getFuncPointer()->getLinkage(), + "LaunchDataflowGraph", &M); DEBUG(errs() << "Generating Launch Function\n"); // Give a name to the argument which is used pass data to this thread - Value* data = &*AppFunc->arg_begin(); + Value *data = &*AppFunc->arg_begin(); data->setName("data.addr"); // Add a basic block to this empty function and a return null statement to it BasicBlock *BB = BasicBlock::Create(AppFunc->getContext(), "entry", AppFunc); - ReturnInst* RI = ReturnInst::Create(AppFunc->getContext(), - Constant::getNullValue(AppFunc->getReturnType()), - BB); - switchToTimer(visc_TimerID_ARG_UNPACK, RI); + ReturnInst *RI = + ReturnInst::Create(AppFunc->getContext(), + Constant::getNullValue(AppFunc->getReturnType()), BB); + switchToTimer(hpvm_TimerID_ARG_UNPACK, RI); DEBUG(errs() << "Created Empty Launch Function\n"); // Find the X86 function generated for Root and -// Function* RootF_X86 = Root->getGenFunc(); - Function* RootF_X86 = Root->getGenFuncForTarget(visc::CPU_TARGET); + // Function* RootF_X86 = Root->getGenFunc(); + Function *RootF_X86 = Root->getGenFuncForTarget(hpvm::CPU_TARGET); assert(RootF_X86 && "Error: No generated CPU function for Root node\n"); - assert(Root->hasX86GenFuncForTarget(visc::CPU_TARGET) && + assert(Root->hasX86GenFuncForTarget(hpvm::CPU_TARGET) && "Error: Generated Function for Root node with no x86 wrapper\n"); // Generate a call to RootF_X86 with null parameters for now - std::vector<Value*>Args; - for(unsigned i=0; i< RootF_X86->getFunctionType()->getNumParams(); i++) { - Args.push_back(Constant::getNullValue(RootF_X86->getFunctionType()->getParamType(i))); + std::vector<Value *> Args; + for (unsigned i = 0; i < RootF_X86->getFunctionType()->getNumParams(); i++) { + Args.push_back( + Constant::getNullValue(RootF_X86->getFunctionType()->getParamType(i))); } - CallInst* CI = CallInst::Create(RootF_X86, Args, RootF_X86->getName()+".output", RI); + CallInst *CI = + CallInst::Create(RootF_X86, Args, RootF_X86->getName() + ".output", RI); // Extract input data from i8* data.addr and patch them to correct argument of // call to RootF_X86. For each argument - std::vector<Type*> TyList; + std::vector<Type *> TyList; std::vector<std::string> names; - for(Function::arg_iterator ai = RootF_X86->arg_begin(), ae = RootF_X86->arg_end(); - ai != ae; ++ai) { + for (Function::arg_iterator ai = RootF_X86->arg_begin(), + ae = RootF_X86->arg_end(); + ai != ae; ++ai) { TyList.push_back(ai->getType()); names.push_back(ai->getName()); } - std::vector<Value*> elements = extractElements(data, TyList, names, CI); + std::vector<Value *> elements = extractElements(data, TyList, names, CI); // Patch the elements to the call arguments - for(unsigned i=0; i<CI->getNumArgOperands(); i++) + for (unsigned i = 0; i < CI->getNumArgOperands(); i++) CI->setArgOperand(i, elements[i]); // Add timers around Call to RootF_X86 function - switchToTimer(visc_TimerID_COMPUTATION, CI); - switchToTimer(visc_TimerID_OUTPUT_PACK, RI); + switchToTimer(hpvm_TimerID_COMPUTATION, CI); + switchToTimer(hpvm_TimerID_OUTPUT_PACK, RI); - StructType *RootRetTy = cast<StructType>(RootF_X86->getFunctionType()->getReturnType()); + StructType *RootRetTy = + cast<StructType>(RootF_X86->getFunctionType()->getReturnType()); - // if Root has non empty return + // if Root has non empty return if (RootRetTy->getNumElements()) { // We can't access the type of the arg struct - build it - std::vector<Type*> TyList; - for(Function::arg_iterator ai = RootF_X86->arg_begin(), ae = RootF_X86->arg_end(); - ai != ae; ++ai) { + std::vector<Type *> TyList; + for (Function::arg_iterator ai = RootF_X86->arg_begin(), + ae = RootF_X86->arg_end(); + ai != ae; ++ai) { TyList.push_back(ai->getType()); } TyList.push_back(CI->getType()); - StructType* ArgStructTy = StructType::create(M.getContext(), - ArrayRef<Type*>(TyList), - (RootF_X86->getName()+".arg.struct.ty").str(), true); + StructType *ArgStructTy = StructType::create( + M.getContext(), ArrayRef<Type *>(TyList), + (RootF_X86->getName() + ".arg.struct.ty").str(), true); // Cast the data pointer to the type of the arg struct - CastInst* OutputAddrCast = CastInst::CreatePointerCast(data, - ArgStructTy->getPointerTo(), - "argStructCast.addr", - RI); + CastInst *OutputAddrCast = CastInst::CreatePointerCast( + data, ArgStructTy->getPointerTo(), "argStructCast.addr", RI); // Result struct is the last element of the packed struct passed to launch unsigned outStructIdx = ArgStructTy->getNumElements() - 1; - ConstantInt *IntZero = ConstantInt::get(Type::getInt32Ty(M.getContext()), 0); - ConstantInt *IntIdx = ConstantInt::get(Type::getInt32Ty(M.getContext()), - outStructIdx); + ConstantInt *IntZero = + ConstantInt::get(Type::getInt32Ty(M.getContext()), 0); + ConstantInt *IntIdx = + ConstantInt::get(Type::getInt32Ty(M.getContext()), outStructIdx); - Value* GEPIIdxList[] = { IntZero, - IntIdx - }; + Value *GEPIIdxList[] = {IntZero, IntIdx}; // Get data pointer to the last element of struct - result field - GetElementPtrInst *OutGEPI = - GetElementPtrInst::Create(ArgStructTy, - OutputAddrCast, - ArrayRef<Value*>(GEPIIdxList, 2), - CI->getName()+".addr", - RI); + GetElementPtrInst *OutGEPI = GetElementPtrInst::Create( + ArgStructTy, OutputAddrCast, ArrayRef<Value *>(GEPIIdxList, 2), + CI->getName() + ".addr", RI); // Store result there new StoreInst(CI, OutGEPI, RI); } else { @@ -794,117 +800,111 @@ void CGT_X86::codeGenLaunch(DFInternalNode* Root) { // We were casting the data pointer to the result type of Root, and // returning result there. This would work at the LLVM level, but not // at the C level, thus the rewrite. - CastInst* OutputAddrCast = CastInst::CreatePointerCast(data, - CI->getType()->getPointerTo(), - CI->getName()+".addr", - RI); + CastInst *OutputAddrCast = CastInst::CreatePointerCast( + data, CI->getType()->getPointerTo(), CI->getName() + ".addr", RI); new StoreInst(CI, OutputAddrCast, RI); } - switchToTimer(visc_TimerID_NONE, RI); + switchToTimer(hpvm_TimerID_NONE, RI); DEBUG(errs() << "Application specific function:\n"); DEBUG(errs() << *AppFunc << "\n"); // Substitute launch intrinsic main - Value* LaunchInstArgs[] = {AppFunc, - LI->getArgOperand(1) - }; - CallInst* LaunchInst = CallInst::Create(llvm_visc_x86_launch, - ArrayRef<Value*>(LaunchInstArgs,2), - "graph"+Root->getFuncPointer()->getName(), LI); - //ReplaceInstWithInst(LI, LaunchInst); + Value *LaunchInstArgs[] = {AppFunc, LI->getArgOperand(1)}; + CallInst *LaunchInst = CallInst::Create( + llvm_hpvm_x86_launch, ArrayRef<Value *>(LaunchInstArgs, 2), + "graph" + Root->getFuncPointer()->getName(), LI); + // ReplaceInstWithInst(LI, LaunchInst); DEBUG(errs() << *LaunchInst << "\n"); // Replace all wait instructions with x86 specific wait instructions - std::vector<IntrinsicInst*>* UseList = getUseList(LI); - for(unsigned i=0; i < UseList->size(); ++i) { - IntrinsicInst* II = UseList->at(i); - CallInst* CI; - switch(II->getIntrinsicID()) { - case Intrinsic::visc_wait: - CI = CallInst::Create(llvm_visc_x86_wait, - ArrayRef<Value*>(LaunchInst), + std::vector<IntrinsicInst *> *UseList = getUseList(LI); + for (unsigned i = 0; i < UseList->size(); ++i) { + IntrinsicInst *II = UseList->at(i); + CallInst *CI; + switch (II->getIntrinsicID()) { + case Intrinsic::hpvm_wait: + CI = CallInst::Create(llvm_hpvm_x86_wait, ArrayRef<Value *>(LaunchInst), ""); break; - case Intrinsic::visc_push: - CI = CallInst::Create(llvm_visc_bufferPush, - ArrayRef<Value*>(LaunchInst), + case Intrinsic::hpvm_push: + CI = CallInst::Create(llvm_hpvm_bufferPush, ArrayRef<Value *>(LaunchInst), ""); break; - case Intrinsic::visc_pop: - CI = CallInst::Create(llvm_visc_bufferPop, - ArrayRef<Value*>(LaunchInst), + case Intrinsic::hpvm_pop: + CI = CallInst::Create(llvm_hpvm_bufferPop, ArrayRef<Value *>(LaunchInst), ""); break; default: - llvm_unreachable("GraphID is used by an instruction other than wait, push, pop"); + llvm_unreachable( + "GraphID is used by an instruction other than wait, push, pop"); }; ReplaceInstWithInst(II, CI); DEBUG(errs() << *CI << "\n"); } - } -Value* CGT_X86::getInValueAt(DFNode* Child, unsigned i, Function* ParentF_X86, Instruction* InsertBefore) { +Value *CGT_X86::getInValueAt(DFNode *Child, unsigned i, Function *ParentF_X86, + Instruction *InsertBefore) { // TODO: Assumption is that each input port of a node has just one // incoming edge. May change later on. // Find the incoming edge at the requested input port - DFEdge* E = Child->getInDFEdgeAt(i); + DFEdge *E = Child->getInDFEdgeAt(i); assert(E && "No incoming edge or binding for input element!"); // Find the Source DFNode associated with the incoming edge - DFNode* SrcDF = E->getSourceDF(); + DFNode *SrcDF = E->getSourceDF(); // If Source DFNode is a dummyNode, edge is from parent. Get the // argument from argument list of this internal node - Value* inputVal; - if(SrcDF->isEntryNode()) { + Value *inputVal; + if (SrcDF->isEntryNode()) { inputVal = getArgumentAt(ParentF_X86, E->getSourcePosition()); - DEBUG(errs() << "Argument "<< i<< " = " << *inputVal << "\n"); - } - else { + DEBUG(errs() << "Argument " << i << " = " << *inputVal << "\n"); + } else { // edge is from a sibling // Check - code should already be generated for this source dfnode - assert(OutputMap.count(SrcDF) - && "Source node call not found. Dependency violation!"); + assert(OutputMap.count(SrcDF) && + "Source node call not found. Dependency violation!"); // Find CallInst associated with the Source DFNode using OutputMap - Value* CI = OutputMap[SrcDF]; + Value *CI = OutputMap[SrcDF]; // Extract element at source position from this call instruction std::vector<unsigned> IndexList; IndexList.push_back(E->getSourcePosition()); - DEBUG(errs() << "Going to generate ExtarctVal inst from "<< *CI <<"\n"); - ExtractValueInst* EI = ExtractValueInst::Create(CI, IndexList, - "", InsertBefore); + DEBUG(errs() << "Going to generate ExtarctVal inst from " << *CI << "\n"); + ExtractValueInst *EI = + ExtractValueInst::Create(CI, IndexList, "", InsertBefore); inputVal = EI; } return inputVal; } -void CGT_X86::invokeChild_X86(DFNode* C, Function* F_X86, - ValueToValueMapTy &VMap,Instruction* IB) { - Function* CF = C->getFuncPointer(); - -// Function* CF_X86 = C->getGenFunc(); - Function *CF_X86 = C->getGenFuncForTarget(visc::CPU_TARGET); - assert(CF_X86 != NULL - && "Found leaf node for which code generation has not happened yet!\n"); - assert(C->hasX86GenFuncForTarget(visc::CPU_TARGET) && - "The generated function to be called from x86 backend is not an x86 function\n"); +void CGT_X86::invokeChild_X86(DFNode *C, Function *F_X86, + ValueToValueMapTy &VMap, Instruction *IB) { + Function *CF = C->getFuncPointer(); + + // Function* CF_X86 = C->getGenFunc(); + Function *CF_X86 = C->getGenFuncForTarget(hpvm::CPU_TARGET); + assert(CF_X86 != NULL && + "Found leaf node for which code generation has not happened yet!\n"); + assert(C->hasX86GenFuncForTarget(hpvm::CPU_TARGET) && + "The generated function to be called from x86 backend is not an x86 " + "function\n"); DEBUG(errs() << "Invoking child node" << CF_X86->getName() << "\n"); - std::vector<Value*> Args; + std::vector<Value *> Args; // Create argument list to pass to call instruction // First find the correct values using the edges // The remaing six values are inserted as constants for now. - for(unsigned i=0; i<CF->getFunctionType()->getNumParams(); i++) { + for (unsigned i = 0; i < CF->getFunctionType()->getNumParams(); i++) { Args.push_back(getInValueAt(C, i, F_X86, IB)); } - Value* I64Zero = ConstantInt::get(Type::getInt64Ty(F_X86->getContext()), 0); - for(unsigned j=0; j<6; j++) + Value *I64Zero = ConstantInt::get(Type::getInt64Ty(F_X86->getContext()), 0); + for (unsigned j = 0; j < 6; j++) Args.push_back(I64Zero); errs() << "Gen Function type: " << *CF_X86->getType() << "\n"; @@ -912,9 +912,8 @@ void CGT_X86::invokeChild_X86(DFNode* C, Function* F_X86, errs() << "Arguments: " << Args.size() << "\n"; // Call the F_X86 function associated with this node - CallInst* CI = CallInst::Create(CF_X86, Args, - CF_X86->getName()+"_output", - IB); + CallInst *CI = + CallInst::Create(CF_X86, Args, CF_X86->getName() + "_output", IB); DEBUG(errs() << *CI << "\n"); OutputMap[C] = CI; @@ -922,55 +921,56 @@ void CGT_X86::invokeChild_X86(DFNode* C, Function* F_X86, // Based on number of dimensions, insert loop instructions std::string varNames[3] = {"x", "y", "z"}; unsigned numArgs = CI->getNumArgOperands(); - for(unsigned j=0; j < C->getNumOfDim(); j++) { - Value* indexLimit = NULL; + for (unsigned j = 0; j < C->getNumOfDim(); j++) { + Value *indexLimit = NULL; // Limit can either be a constant or an arguement of the internal node. // In case of constant we can use that constant value directly in the // new F_X86 function. In case of an argument, we need to get the mapped // value using VMap - if(isa<Constant>(C->getDimLimits()[j])) { + if (isa<Constant>(C->getDimLimits()[j])) { indexLimit = C->getDimLimits()[j]; DEBUG(errs() << "In Constant case:\n" - << " indexLimit type = " << *indexLimit->getType() << "\n"); - } - else { + << " indexLimit type = " << *indexLimit->getType() << "\n"); + } else { indexLimit = VMap[C->getDimLimits()[j]]; DEBUG(errs() << "In VMap case:" - <<" indexLimit type = " << *indexLimit->getType() << "\n"); + << " indexLimit type = " << *indexLimit->getType() << "\n"); } assert(indexLimit && "Invalid dimension limit!"); // Insert loop - Value* indexVar = addLoop(CI, indexLimit, varNames[j]); + Value *indexVar = addLoop(CI, indexLimit, varNames[j]); DEBUG(errs() << "indexVar type = " << *indexVar->getType() << "\n"); // Insert index variable and limit arguments - CI->setArgOperand(numArgs-6+j, indexVar); - CI->setArgOperand(numArgs-3+j, indexLimit); + CI->setArgOperand(numArgs - 6 + j, indexVar); + CI->setArgOperand(numArgs - 3 + j, indexLimit); } // Insert call to runtime to push the dim limits and instanceID on the depth // stack - Value* args[] = { - ConstantInt::get(Type::getInt32Ty(CI->getContext()), C->getNumOfDim()), // numDim - CI->getArgOperand(numArgs-3+0), // limitX - CI->getArgOperand(numArgs-6+0), // iX - CI->getArgOperand(numArgs-3+1), // limitY - CI->getArgOperand(numArgs-6+1), // iY - CI->getArgOperand(numArgs-3+2), // limitZ - CI->getArgOperand(numArgs-6+2) // iZ + Value *args[] = { + ConstantInt::get(Type::getInt32Ty(CI->getContext()), + C->getNumOfDim()), // numDim + CI->getArgOperand(numArgs - 3 + 0), // limitX + CI->getArgOperand(numArgs - 6 + 0), // iX + CI->getArgOperand(numArgs - 3 + 1), // limitY + CI->getArgOperand(numArgs - 6 + 1), // iY + CI->getArgOperand(numArgs - 3 + 2), // limitZ + CI->getArgOperand(numArgs - 6 + 2) // iZ }; - CallInst* Push = CallInst::Create(llvm_visc_x86_dstack_push, ArrayRef<Value*>(args, 7), "", CI); + CallInst *Push = CallInst::Create(llvm_hpvm_x86_dstack_push, + ArrayRef<Value *>(args, 7), "", CI); DEBUG(errs() << "Push on stack: " << *Push << "\n"); // Insert call to runtime to pop the dim limits and instanceID from the depth // stack BasicBlock::iterator i(CI); ++i; - Instruction* NextI = &*i; + Instruction *NextI = &*i; // Next Instruction should also belong to the same basic block as the basic // block will have a terminator instruction - assert(NextI->getParent() == CI->getParent() - && "Next Instruction should also belong to the same basic block!"); + assert(NextI->getParent() == CI->getParent() && + "Next Instruction should also belong to the same basic block!"); - CallInst* Pop = CallInst::Create(llvm_visc_x86_dstack_pop, None, "", NextI); + CallInst *Pop = CallInst::Create(llvm_hpvm_x86_dstack_pop, None, "", NextI); DEBUG(errs() << "Pop from stack: " << *Pop << "\n"); DEBUG(errs() << *CI->getParent()->getParent()); } @@ -991,34 +991,33 @@ void CGT_X86::invokeChild_X86(DFNode* C, Function* F_X86, // Add runtime API calls to push output for each of the streaming outputs // Add loop around the basic block, which exits the loop if isLastInput is false -Function* CGT_X86::createFunctionFilter(DFNode* C) { - DEBUG(errs() << "*********Creating Function filter for " << C->getFuncPointer()->getName() << "*****\n"); +Function *CGT_X86::createFunctionFilter(DFNode *C) { + DEBUG(errs() << "*********Creating Function filter for " + << C->getFuncPointer()->getName() << "*****\n"); /* Create a function with same argument list as child.*/ DEBUG(errs() << "\tCreate a function with the same argument list as child\n"); // Get the generated function for child node - Function* CF = C->getFuncPointer(); + Function *CF = C->getFuncPointer(); // Create Filter Function of type i8*(i8*) which calls the root function - Type* i8Ty = Type::getInt8Ty(M.getContext()); - FunctionType* CF_PipelineTy = FunctionType::get(i8Ty->getPointerTo(), - ArrayRef<Type*>(i8Ty->getPointerTo()), - false); - Function* CF_Pipeline = Function::Create(CF_PipelineTy, - CF->getLinkage(), - CF->getName()+"_Pipeline", - &M); + Type *i8Ty = Type::getInt8Ty(M.getContext()); + FunctionType *CF_PipelineTy = FunctionType::get( + i8Ty->getPointerTo(), ArrayRef<Type *>(i8Ty->getPointerTo()), false); + Function *CF_Pipeline = Function::Create(CF_PipelineTy, CF->getLinkage(), + CF->getName() + "_Pipeline", &M); DEBUG(errs() << "Generating Pipeline Function\n"); // Give a name to the argument which is used pass data to this thread - Value* data = &*CF_Pipeline->arg_begin(); + Value *data = &*CF_Pipeline->arg_begin(); data->setName("data.addr"); // Create a new basic block DEBUG(errs() << "\tCreate new BB and add a return function\n"); // Add a basic block to this empty function - BasicBlock *BB = BasicBlock::Create(CF_Pipeline->getContext(), "entry", CF_Pipeline); + BasicBlock *BB = + BasicBlock::Create(CF_Pipeline->getContext(), "entry", CF_Pipeline); // Add a return instruction to the basic block - ReturnInst* RI = ReturnInst::Create(CF_Pipeline->getContext(), - UndefValue::get(CF_Pipeline->getReturnType()), BB); - + ReturnInst *RI = + ReturnInst::Create(CF_Pipeline->getContext(), + UndefValue::get(CF_Pipeline->getReturnType()), BB); /* Extract the elements from the aggregate argument to the function. * Replace the streaming inputs with i8* types signifying handle to @@ -1029,25 +1028,24 @@ Function* CGT_X86::createFunctionFilter(DFNode* C) { DEBUG(errs() << "\tReplace streaming input arguments with i8* type\n"); // These Args will be used when passing arguments to the generated function // inside loop, and reading outputs as well. - std::vector<Value*> Args; - std::vector<Type*> TyList; + std::vector<Value *> Args; + std::vector<Type *> TyList; std::vector<std::string> names; // Adding inputs - for (Function::arg_iterator i = CF->arg_begin(), e = CF->arg_end(); - i != e; ++i) { - if(C->getInDFEdgeAt(i->getArgNo())->isStreamingEdge()) { + for (Function::arg_iterator i = CF->arg_begin(), e = CF->arg_end(); i != e; + ++i) { + if (C->getInDFEdgeAt(i->getArgNo())->isStreamingEdge()) { TyList.push_back(i8Ty->getPointerTo()); - names.push_back((Twine(i->getName())+"_buffer").str()); - } - else { + names.push_back((Twine(i->getName()) + "_buffer").str()); + } else { TyList.push_back(i->getType()); names.push_back(i->getName()); } } // Adding outputs. FIXME: Since we assume all outputs to be streaming edges, // because we get there buffer handles - StructType* RetTy = cast<StructType>(CF->getReturnType()); - for (unsigned i=0; i<RetTy->getNumElements(); i++) { + StructType *RetTy = cast<StructType>(CF->getReturnType()); + for (unsigned i = 0; i < RetTy->getNumElements(); i++) { TyList.push_back(i8Ty->getPointerTo()); names.push_back("out"); } @@ -1056,66 +1054,54 @@ Function* CGT_X86::createFunctionFilter(DFNode* C) { TyList.push_back(i8Ty->getPointerTo()); names.push_back("isLastInput_buffer"); - // Extract the inputs, outputs + // Extract the inputs, outputs Args = extractElements(data, TyList, names, RI); - for(unsigned i=0; i<Args.size(); i++) { + for (unsigned i = 0; i < Args.size(); i++) { DEBUG(errs() << *Args[i] << "\n"); } // Split the Args vector into, input output and isLastInput unsigned numInputs = CF->getFunctionType()->getNumParams(); unsigned numOutputs = RetTy->getNumElements(); - std::vector<Value*> InputArgs(Args.begin(), Args.begin() + numInputs); - std::vector<Value*> OutputArgs(Args.begin() + numInputs, Args.begin() + numInputs + numOutputs); - Instruction* isLastInput = cast<Instruction>(Args[Args.size()-1]); + std::vector<Value *> InputArgs(Args.begin(), Args.begin() + numInputs); + std::vector<Value *> OutputArgs(Args.begin() + numInputs, + Args.begin() + numInputs + numOutputs); + Instruction *isLastInput = cast<Instruction>(Args[Args.size() - 1]); /* Add runtime API calls to get input for each of the streaming input edges */ - DEBUG(errs() << "\tAdd runtime API calls to get input for each of the streaming input edges\n"); + DEBUG(errs() << "\tAdd runtime API calls to get input for each of the " + "streaming input edges\n"); // First read the termination condition variable islastInput - CallInst* isLastInputPop = CallInst::Create(llvm_visc_bufferPop, - ArrayRef<Value*>(isLastInput), - "", - RI); - - CastInst* BI = BitCastInst::CreateIntegerCast(isLastInputPop, - Type::getInt64Ty(CF_Pipeline->getContext()), - false, - "isLastInput", - RI); + CallInst *isLastInputPop = CallInst::Create( + llvm_hpvm_bufferPop, ArrayRef<Value *>(isLastInput), "", RI); + + CastInst *BI = BitCastInst::CreateIntegerCast( + isLastInputPop, Type::getInt64Ty(CF_Pipeline->getContext()), false, + "isLastInput", RI); isLastInput = BI; // Create a loop termination condition - CmpInst* Cond = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_NE, - isLastInput, Constant::getNullValue(Type::getInt64Ty(CF->getContext())), "isLastInputNotZero", - RI); + CmpInst *Cond = CmpInst::Create( + Instruction::ICmp, CmpInst::ICMP_NE, isLastInput, + Constant::getNullValue(Type::getInt64Ty(CF->getContext())), + "isLastInputNotZero", RI); // Get input from buffers of all the incoming streaming edges - for (Function::arg_iterator i = CF->arg_begin(), e = CF->arg_end(); - i != e; ++i) { - if(C->getInDFEdgeAt(i->getArgNo())->isStreamingEdge()) { - CallInst* bufferIn = CallInst::Create(llvm_visc_bufferPop, - ArrayRef<Value*>(InputArgs[i->getArgNo()]), - "", - RI); - CastInst* BI; - if(i->getType()->isPointerTy()) { - BI = CastInst::Create(CastInst::IntToPtr, - bufferIn, - i->getType(), - i->getName()+".addr", - RI); - } - else if(i->getType()->isFloatTy()) { - BI = CastInst::CreateFPCast(bufferIn, - i->getType(), - i->getName()+".addr", - RI); - } - else { - BI = CastInst::CreateIntegerCast(bufferIn, - i->getType(), - false, - i->getName()+".addr", - RI); + for (Function::arg_iterator i = CF->arg_begin(), e = CF->arg_end(); i != e; + ++i) { + if (C->getInDFEdgeAt(i->getArgNo())->isStreamingEdge()) { + CallInst *bufferIn = + CallInst::Create(llvm_hpvm_bufferPop, + ArrayRef<Value *>(InputArgs[i->getArgNo()]), "", RI); + CastInst *BI; + if (i->getType()->isPointerTy()) { + BI = CastInst::Create(CastInst::IntToPtr, bufferIn, i->getType(), + i->getName() + ".addr", RI); + } else if (i->getType()->isFloatTy()) { + BI = CastInst::CreateFPCast(bufferIn, i->getType(), + i->getName() + ".addr", RI); + } else { + BI = CastInst::CreateIntegerCast(bufferIn, i->getType(), false, + i->getName() + ".addr", RI); } // Replace the argument in Args vector. We would be using the vector as // parameters passed to the call @@ -1124,46 +1110,40 @@ Function* CGT_X86::createFunctionFilter(DFNode* C) { } /* Add a call to the generated function of the child node */ DEBUG(errs() << "\tAdd a call to the generated function of the child node\n"); -// DEBUG(errs() << "Type: " << *C->getGenFunc()->getType() << "\n"); -// CallInst* CI = CallInst::Create(C->getGenFunc(), InputArgs, -// C->getGenFunc()->getName()+".output", RI); - Function *CGenF = C->getGenFuncForTarget(visc::CPU_TARGET); - DEBUG(errs() << "Type: " - << *CGenF->getType() - << "\n"); - CallInst* CI = CallInst::Create(CGenF, - InputArgs, - CGenF->getName()+".output", - RI); + // DEBUG(errs() << "Type: " << *C->getGenFunc()->getType() << "\n"); + // CallInst* CI = CallInst::Create(C->getGenFunc(), InputArgs, + // C->getGenFunc()->getName()+".output", RI); + Function *CGenF = C->getGenFuncForTarget(hpvm::CPU_TARGET); + DEBUG(errs() << "Type: " << *CGenF->getType() << "\n"); + CallInst *CI = + CallInst::Create(CGenF, InputArgs, CGenF->getName() + ".output", RI); /* Add runtime API calls to push output for each of the streaming outputs */ // FIXME: Assumption // All edges between siblings are streaming edges - DEBUG(errs() << "\tAdd runtime API calls to push output for each of the streaming outputs\n"); - for (unsigned i=0; i< numOutputs; i++) { + DEBUG(errs() << "\tAdd runtime API calls to push output for each of the " + "streaming outputs\n"); + for (unsigned i = 0; i < numOutputs; i++) { // Extract output - ExtractValueInst* EI = ExtractValueInst::Create(CI, ArrayRef<unsigned>(i), - "",RI); + ExtractValueInst *EI = + ExtractValueInst::Create(CI, ArrayRef<unsigned>(i), "", RI); // Convert to i64 - CastInst* BI; - if(EI->getType()->isPointerTy()) - BI = CastInst::Create(CastInst::PtrToInt,EI, - Type::getInt64Ty(CF_Pipeline->getContext()), - "", - RI); + CastInst *BI; + if (EI->getType()->isPointerTy()) + BI = + CastInst::Create(CastInst::PtrToInt, EI, + Type::getInt64Ty(CF_Pipeline->getContext()), "", RI); else - BI = CastInst::CreateIntegerCast(EI, Type::getInt64Ty(CF_Pipeline->getContext()), - false, "", RI); + BI = CastInst::CreateIntegerCast( + EI, Type::getInt64Ty(CF_Pipeline->getContext()), false, "", RI); // Push to Output buffer - Value* bufferOutArgs[] = {OutputArgs[i], BI}; - CallInst::Create(llvm_visc_bufferPush, - ArrayRef<Value*>(bufferOutArgs, 2), - "", - RI); + Value *bufferOutArgs[] = {OutputArgs[i], BI}; + CallInst::Create(llvm_hpvm_bufferPush, ArrayRef<Value *>(bufferOutArgs, 2), + "", RI); } - // Add loop around the basic block, which exits the loop if isLastInput is false - // Pointers to keep the created loop structure + // Add loop around the basic block, which exits the loop if isLastInput is + // false Pointers to keep the created loop structure BasicBlock *EntryBB, *CondBB, *BodyBB; Instruction *CondStartI = cast<Instruction>(isLastInputPop); Instruction *BodyStartI = cast<Instruction>(Cond)->getNextNode(); @@ -1177,23 +1157,23 @@ Function* CGT_X86::createFunctionFilter(DFNode* C) { return CF_Pipeline; } -void CGT_X86::codeGen(DFInternalNode* N) { +void CGT_X86::codeGen(DFInternalNode *N) { // Check if N is root node and its graph is streaming. We do not do codeGen // for Root in such a case - if(N->isRoot() && N->isChildGraphStreaming()) + if (N->isRoot() && N->isChildGraphStreaming()) return; // Check if clone already exists. If it does, it means we have visited this // function before and nothing else needs to be done for this leaf node. -// if(N->getGenFunc() != NULL) -// return; - if (!preferredTargetIncludes(N, visc::CPU_TARGET)) { - errs() << "No CPU hint for node " << N->getFuncPointer()->getName() << - " : skipping it\n"; + // if(N->getGenFunc() != NULL) + // return; + if (!preferredTargetIncludes(N, hpvm::CPU_TARGET)) { + DEBUG(errs() << "No CPU hint for node " << N->getFuncPointer()->getName() + << " : skipping it\n"); return; } - assert(N->getGenFuncForTarget(visc::CPU_TARGET) == NULL && + assert(N->getGenFuncForTarget(hpvm::CPU_TARGET) == NULL && "Error: Visiting a node for which code already generated\n"); // Sort children in topological order before code generation @@ -1202,14 +1182,15 @@ void CGT_X86::codeGen(DFInternalNode* N) { // Only process if all children have a CPU x86 function // Otherwise skip to end bool codeGen = true; - for(DFGraph::children_iterator ci = N->getChildGraph()->begin(), - ce = N->getChildGraph()->end(); ci != ce; ++ci) { - DFNode* C = *ci; + for (DFGraph::children_iterator ci = N->getChildGraph()->begin(), + ce = N->getChildGraph()->end(); + ci != ce; ++ci) { + DFNode *C = *ci; // Skip dummy node call if (C->isDummyNode()) continue; - if (!(C->hasX86GenFuncForTarget(visc::CPU_TARGET))) { + if (!(C->hasX86GenFuncForTarget(hpvm::CPU_TARGET))) { errs() << "No CPU x86 version for child node " << C->getFuncPointer()->getName() << "\n Skip code gen for parent node " @@ -1219,17 +1200,18 @@ void CGT_X86::codeGen(DFInternalNode* N) { } if (codeGen) { - Function* F = N->getFuncPointer(); + Function *F = N->getFuncPointer(); // Create of clone of F with no instructions. Only the type is the same as F // without the extra arguments. - Function* F_X86; - + Function *F_X86; + // Clone the function, if we are seeing this function for the first time. We // only need a clone in terms of type. ValueToValueMapTy VMap; - + // Create new function with the same type - F_X86 = Function::Create(F->getFunctionType(), F->getLinkage(), F->getName(), &M); + F_X86 = Function::Create(F->getFunctionType(), F->getLinkage(), + F->getName(), &M); // Loop over the arguments, copying the names of arguments over. Function::arg_iterator dest_iterator = F_X86->arg_begin(); @@ -1242,20 +1224,20 @@ void CGT_X86::codeGen(DFInternalNode* N) { // Add a basic block to this empty function BasicBlock *BB = BasicBlock::Create(F_X86->getContext(), "entry", F_X86); - ReturnInst* RI = ReturnInst::Create(F_X86->getContext(), - UndefValue::get(F_X86->getReturnType()), BB); + ReturnInst *RI = ReturnInst::Create( + F_X86->getContext(), UndefValue::get(F_X86->getReturnType()), BB); - // Add Index and Dim arguments except for the root node and the child graph of - // parent node is not streaming - if(!N->isRoot() && !N->getParent()->isChildGraphStreaming()) + // Add Index and Dim arguments except for the root node and the child graph + // of parent node is not streaming + if (!N->isRoot() && !N->getParent()->isChildGraphStreaming()) F_X86 = addIdxDimArgs(F_X86); BB = &*F_X86->begin(); RI = cast<ReturnInst>(BB->getTerminator()); - - //Add generated function info to DFNode -// N->setGenFunc(F_X86, visc::CPU_TARGET); - N->addGenFunc(F_X86, visc::CPU_TARGET, true); + + // Add generated function info to DFNode + // N->setGenFunc(F_X86, hpvm::CPU_TARGET); + N->addGenFunc(F_X86, hpvm::CPU_TARGET, true); // Loop over the arguments, to create the VMap. dest_iterator = F_X86->arg_begin(); @@ -1267,59 +1249,59 @@ void CGT_X86::codeGen(DFInternalNode* N) { } // Iterate over children in topological order - for(DFGraph::children_iterator ci = N->getChildGraph()->begin(), - ce = N->getChildGraph()->end(); ci != ce; ++ci) { - DFNode* C = *ci; + for (DFGraph::children_iterator ci = N->getChildGraph()->begin(), + ce = N->getChildGraph()->end(); + ci != ce; ++ci) { + DFNode *C = *ci; // Skip dummy node call if (C->isDummyNode()) continue; - + // Create calls to CPU function of child node invokeChild_X86(C, F_X86, VMap, RI); - } - + DEBUG(errs() << "*** Generating epilogue code for the function****\n"); // Generate code for output bindings // Get Exit node - DFNode* C = N->getChildGraph()->getExit(); + DFNode *C = N->getChildGraph()->getExit(); // Get OutputType of this node - StructType* OutTy = N->getOutputType(); + StructType *OutTy = N->getOutputType(); Value *retVal = UndefValue::get(F_X86->getReturnType()); // Find all the input edges to exit node - for (unsigned i=0; i < OutTy->getNumElements(); i++) { + for (unsigned i = 0; i < OutTy->getNumElements(); i++) { DEBUG(errs() << "Output Edge " << i << "\n"); // Find the incoming edge at the requested input port - DFEdge* E = C->getInDFEdgeAt(i); - + DFEdge *E = C->getInDFEdgeAt(i); + assert(E && "No Binding for output element!"); // Find the Source DFNode associated with the incoming edge - DFNode* SrcDF = E->getSourceDF(); - - DEBUG(errs() << "Edge source -- " << SrcDF->getFuncPointer()->getName() << "\n"); - + DFNode *SrcDF = E->getSourceDF(); + + DEBUG(errs() << "Edge source -- " << SrcDF->getFuncPointer()->getName() + << "\n"); + // If Source DFNode is a dummyNode, edge is from parent. Get the // argument from argument list of this internal node - Value* inputVal; - if(SrcDF->isEntryNode()) { + Value *inputVal; + if (SrcDF->isEntryNode()) { inputVal = getArgumentAt(F_X86, i); - DEBUG(errs() << "Argument "<< i<< " = " << *inputVal << "\n"); - } - else { + DEBUG(errs() << "Argument " << i << " = " << *inputVal << "\n"); + } else { // edge is from a internal node // Check - code should already be generated for this source dfnode - assert(OutputMap.count(SrcDF) - && "Source node call not found. Dependency violation!"); - + assert(OutputMap.count(SrcDF) && + "Source node call not found. Dependency violation!"); + // Find Output Value associated with the Source DFNode using OutputMap - Value* CI = OutputMap[SrcDF]; - + Value *CI = OutputMap[SrcDF]; + // Extract element at source position from this call instruction std::vector<unsigned> IndexList; IndexList.push_back(E->getSourcePosition()); - DEBUG(errs() << "Going to generate ExtarctVal inst from "<< *CI <<"\n"); - ExtractValueInst* EI = ExtractValueInst::Create(CI, IndexList, - "",RI); + DEBUG(errs() << "Going to generate ExtarctVal inst from " << *CI + << "\n"); + ExtractValueInst *EI = ExtractValueInst::Create(CI, IndexList, "", RI); inputVal = EI; } std::vector<unsigned> IdxList; @@ -1328,9 +1310,8 @@ void CGT_X86::codeGen(DFInternalNode* N) { } DEBUG(errs() << "Extracted all\n"); retVal->setName("output"); - ReturnInst* newRI = ReturnInst::Create(F_X86->getContext(), retVal); + ReturnInst *newRI = ReturnInst::Create(F_X86->getContext(), retVal); ReplaceInstWithInst(RI, newRI); - } //-------------------------------------------------------------------------// @@ -1339,88 +1320,83 @@ void CGT_X86::codeGen(DFInternalNode* N) { // If not, we see which version exists, check that it is in fact an x86 // function and save it as the CPU_TARGET function - // TODO: visc_id per node, so we can use this for id for policies + // TODO: hpvm_id per node, so we can use this for id for policies // For now, use node function name and change it later - Function *CF = N->getGenFuncForTarget(visc::CPU_TARGET); - Function *GF = N->getGenFuncForTarget(visc::GPU_TARGET); + Function *CF = N->getGenFuncForTarget(hpvm::CPU_TARGET); + Function *GF = N->getGenFuncForTarget(hpvm::GPU_TARGET); - bool CFx86 = N->hasX86GenFuncForTarget(visc::CPU_TARGET); - bool GFx86 = N->hasX86GenFuncForTarget(visc::GPU_TARGET); + bool CFx86 = N->hasX86GenFuncForTarget(hpvm::CPU_TARGET); + bool GFx86 = N->hasX86GenFuncForTarget(hpvm::GPU_TARGET); - DEBUG(errs() << "Before editing\n"); - DEBUG(errs() << "Node: " << N->getFuncPointer()->getName() - << " with tag " << N->getTag() << "\n"); - DEBUG(errs() << "CPU Fun: " << (CF ? CF->getName() : "null" ) << "\n"); + DEBUG(errs() << "Before editing\n"); + DEBUG(errs() << "Node: " << N->getFuncPointer()->getName() << " with tag " + << N->getTag() << "\n"); + DEBUG(errs() << "CPU Fun: " << (CF ? CF->getName() : "null") << "\n"); DEBUG(errs() << "hasx86GenFuncForCPU : " << CFx86 << "\n"); - DEBUG(errs() << "GPU Fun: " << (GF ? GF->getName() : "null" ) << "\n"); + DEBUG(errs() << "GPU Fun: " << (GF ? GF->getName() : "null") << "\n"); DEBUG(errs() << "hasx86GenFuncForGPU : " << GFx86 << "\n"); - - if (N->getTag() == visc::None) { + if (N->getTag() == hpvm::None) { // No code is available for this node. This (usually) means that this // node is a node that // - from the accelerator backends has been mapped to an intermediate // node, and thus they have not produced a genFunc - // - a child node had no CPU hint, thus no code gen for CPU could + // - a child node had no CPU hint, thus no code gen for CPU could // take place DEBUG(errs() << "No GenFunc - Skipping CPU code generation for node " - << N->getFuncPointer()->getName() << "\n"); - } else if (viscUtils::isSingleTargetTag(N->getTag())) { + << N->getFuncPointer()->getName() << "\n"); + } else if (hpvmUtils::isSingleTargetTag(N->getTag())) { // There is a single version for this node according to code gen hints. // Therefore, we do not need to check the policy, we simply use the // available implementation, whichever target it is for. // Sanity check - to be removed TODO switch (N->getTag()) { - case visc::CPU_TARGET: - assert(N->getGenFuncForTarget(visc::CPU_TARGET) && ""); - assert(N->hasX86GenFuncForTarget(visc::CPU_TARGET) && ""); - assert(!(N->getGenFuncForTarget(visc::GPU_TARGET)) && ""); - assert(!(N->hasX86GenFuncForTarget(visc::GPU_TARGET)) && ""); - break; - case visc::GPU_TARGET: - assert(!(N->getGenFuncForTarget(visc::CPU_TARGET)) && ""); - assert(!(N->hasX86GenFuncForTarget(visc::CPU_TARGET)) && ""); - assert(N->getGenFuncForTarget(visc::GPU_TARGET) && ""); - assert(N->hasX86GenFuncForTarget(visc::GPU_TARGET) && ""); - break; - default: - assert(false && "Unreachable: we checked that tag was single target!\n"); - break; + case hpvm::CPU_TARGET: + assert(N->getGenFuncForTarget(hpvm::CPU_TARGET) && ""); + assert(N->hasX86GenFuncForTarget(hpvm::CPU_TARGET) && ""); + assert(!(N->getGenFuncForTarget(hpvm::GPU_TARGET)) && ""); + assert(!(N->hasX86GenFuncForTarget(hpvm::GPU_TARGET)) && ""); + break; + case hpvm::GPU_TARGET: + assert(!(N->getGenFuncForTarget(hpvm::CPU_TARGET)) && ""); + assert(!(N->hasX86GenFuncForTarget(hpvm::CPU_TARGET)) && ""); + assert(N->getGenFuncForTarget(hpvm::GPU_TARGET) && ""); + assert(N->hasX86GenFuncForTarget(hpvm::GPU_TARGET) && ""); + break; + default: + assert(false && "Unreachable: we checked that tag was single target!\n"); + break; } - - N->addGenFunc(N->getGenFuncForTarget(N->getTag()), - visc::CPU_TARGET, - true); - N->removeGenFuncForTarget(visc::GPU_TARGET); - N->setTag(visc::CPU_TARGET); - - // Sanity checks - to be removed TODO - CF = N->getGenFuncForTarget(visc::CPU_TARGET); - GF = N->getGenFuncForTarget(visc::GPU_TARGET); - - CFx86 = N->hasX86GenFuncForTarget(visc::CPU_TARGET); - GFx86 = N->hasX86GenFuncForTarget(visc::GPU_TARGET); - - DEBUG(errs() << "After editing\n"); - DEBUG(errs() << "Node: " << N->getFuncPointer()->getName() - << " with tag " << N->getTag() << "\n"); - DEBUG(errs() << "CPU Fun: " << (CF ? CF->getName() : "null" ) << "\n"); - DEBUG(errs() << "hasx86GenFuncForCPU : " << CFx86 << "\n"); - DEBUG(errs() << "GPU Fun: " << (GF ? GF->getName() : "null" ) << "\n"); - DEBUG(errs() << "hasx86GenFuncForGPU : " << GFx86 << "\n"); - - } - else { - assert(false && "Multiple tags unsupported!"); - } + N->addGenFunc(N->getGenFuncForTarget(N->getTag()), hpvm::CPU_TARGET, true); + N->removeGenFuncForTarget(hpvm::GPU_TARGET); + N->setTag(hpvm::CPU_TARGET); + + // Sanity checks - to be removed TODO + CF = N->getGenFuncForTarget(hpvm::CPU_TARGET); + GF = N->getGenFuncForTarget(hpvm::GPU_TARGET); + + CFx86 = N->hasX86GenFuncForTarget(hpvm::CPU_TARGET); + GFx86 = N->hasX86GenFuncForTarget(hpvm::GPU_TARGET); + + DEBUG(errs() << "After editing\n"); + DEBUG(errs() << "Node: " << N->getFuncPointer()->getName() << " with tag " + << N->getTag() << "\n"); + DEBUG(errs() << "CPU Fun: " << (CF ? CF->getName() : "null") << "\n"); + DEBUG(errs() << "hasx86GenFuncForCPU : " << CFx86 << "\n"); + DEBUG(errs() << "GPU Fun: " << (GF ? GF->getName() : "null") << "\n"); + DEBUG(errs() << "hasx86GenFuncForGPU : " << GFx86 << "\n"); + + } else { + assert(false && "Multiple tags unsupported!"); + } } // Code generation for leaf nodes -void CGT_X86::codeGen(DFLeafNode* N) { +void CGT_X86::codeGen(DFLeafNode *N) { // Skip code generation if it is a dummy node - if(N->isDummyNode()) { + if (N->isDummyNode()) { DEBUG(errs() << "Skipping dummy node\n"); return; } @@ -1437,31 +1413,32 @@ void CGT_X86::codeGen(DFLeafNode* N) { // Check if clone already exists. If it does, it means we have visited this // function before and nothing else needs to be done for this leaf node. -// if(N->getGenFunc() != NULL) -// return; + // if(N->getGenFunc() != NULL) + // return; - if (!preferredTargetIncludes(N, visc::CPU_TARGET)) { - errs() << "No CPU hint for node " << N->getFuncPointer()->getName() << - " : skipping it\n"; + if (!preferredTargetIncludes(N, hpvm::CPU_TARGET)) { + errs() << "No CPU hint for node " << N->getFuncPointer()->getName() + << " : skipping it\n"; switch (N->getTag()) { - case visc::GPU_TARGET: - // A leaf node should not have an x86 function for GPU - // by design of DFG2LLVM_NVPTX backend - assert(!(N->hasX86GenFuncForTarget(visc::GPU_TARGET)) && "Leaf node not expected to have GPU GenFunc"); - break; - default: - break; + case hpvm::GPU_TARGET: + // A leaf node should not have an x86 function for GPU + // by design of DFG2LLVM_NVPTX backend + assert(!(N->hasX86GenFuncForTarget(hpvm::GPU_TARGET)) && + "Leaf node not expected to have GPU GenFunc"); + break; + default: + break; } return; } - assert(N->getGenFuncForTarget(visc::CPU_TARGET) == NULL && + assert(N->getGenFuncForTarget(hpvm::CPU_TARGET) == NULL && "Error: Visiting a node for which code already generated\n"); std::vector<IntrinsicInst *> IItoRemove; - std::vector<std::pair<IntrinsicInst *, Value *> > IItoReplace; + std::vector<std::pair<IntrinsicInst *, Value *>> IItoReplace; BuildDFG::HandleToDFNode Leaf_HandleToDFNodeMap; // Get the function associated woth the dataflow node @@ -1477,41 +1454,39 @@ void CGT_X86::codeGen(DFLeafNode* N) { // Add the new argument to the argument list. Add arguments only if the cild // graph of parent node is not streaming - if(!N->getParent()->isChildGraphStreaming()) + if (!N->getParent()->isChildGraphStreaming()) F_X86 = addIdxDimArgs(F_X86); // Add generated function info to DFNode -// N->setGenFunc(F_X86, visc::CPU_TARGET); - N->addGenFunc(F_X86, visc::CPU_TARGET, true); + // N->setGenFunc(F_X86, hpvm::CPU_TARGET); + N->addGenFunc(F_X86, hpvm::CPU_TARGET, true); // Go through the arguments, and any pointer arguments with in attribute need // to have x86_argument_ptr call to get the x86 ptr of the argument // Insert these calls in a new BB which would dominate all other BBs // Create new BB - BasicBlock* EntryBB = &*F_X86->begin(); - BasicBlock* BB = BasicBlock::Create(M.getContext(), "getVISCPtrArgs", F_X86, EntryBB); - BranchInst* Terminator = BranchInst::Create(EntryBB, BB); + BasicBlock *EntryBB = &*F_X86->begin(); + BasicBlock *BB = + BasicBlock::Create(M.getContext(), "getHPVMPtrArgs", F_X86, EntryBB); + BranchInst *Terminator = BranchInst::Create(EntryBB, BB); // Insert calls - for(Function::arg_iterator ai = F_X86->arg_begin(), ae = F_X86->arg_end(); - ai != ae; ++ai) { - if (F_X86->getAttributes().hasAttribute(ai->getArgNo()+1, Attribute::In)) { - assert(ai->getType()->isPointerTy() - && "Only pointer arguments can have visc in/out attributes "); + for (Function::arg_iterator ai = F_X86->arg_begin(), ae = F_X86->arg_end(); + ai != ae; ++ai) { + if (F_X86->getAttributes().hasAttribute(ai->getArgNo() + 1, + Attribute::In)) { + assert(ai->getType()->isPointerTy() && + "Only pointer arguments can have hpvm in/out attributes "); Function::arg_iterator aiNext = ai; ++aiNext; - Argument* size = &*aiNext; - assert(size->getType() == Type::getInt64Ty(M.getContext()) - && "Next argument after a pointer should be an i64 type"); - CastInst* BI = BitCastInst::CreatePointerCast(&*ai, - Type::getInt8PtrTy(M.getContext()), - ai->getName()+".i8ptr", - Terminator); - Value* ArgPtrCallArgs[] = {BI, size}; - CallInst::Create(llvm_visc_x86_argument_ptr, - ArrayRef<Value*>(ArgPtrCallArgs, 2), - "", - Terminator); - + Argument *size = &*aiNext; + assert(size->getType() == Type::getInt64Ty(M.getContext()) && + "Next argument after a pointer should be an i64 type"); + CastInst *BI = BitCastInst::CreatePointerCast( + &*ai, Type::getInt8PtrTy(M.getContext()), ai->getName() + ".i8ptr", + Terminator); + Value *ArgPtrCallArgs[] = {BI, size}; + CallInst::Create(llvm_hpvm_x86_argument_ptr, + ArrayRef<Value *>(ArgPtrCallArgs, 2), "", Terminator); } } errs() << *BB << "\n"; @@ -1520,28 +1495,30 @@ void CGT_X86::codeGen(DFLeafNode* N) { for (inst_iterator i = inst_begin(F_X86), e = inst_end(F_X86); i != e; ++i) { Instruction *I = &(*i); DEBUG(errs() << *I << "\n"); - // Leaf nodes should not contain VISC graph intrinsics or launch - assert(!BuildDFG::isViscLaunchIntrinsic(I) && "Launch intrinsic within a dataflow graph!"); - assert(!BuildDFG::isViscGraphIntrinsic(I) && "VISC graph intrinsic within a leaf dataflow node!"); + // Leaf nodes should not contain HPVM graph intrinsics or launch + assert(!BuildDFG::isHPVMLaunchIntrinsic(I) && + "Launch intrinsic within a dataflow graph!"); + assert(!BuildDFG::isHPVMGraphIntrinsic(I) && + "HPVM graph intrinsic within a leaf dataflow node!"); - if (BuildDFG::isViscQueryIntrinsic(I)) { - IntrinsicInst* II = cast<IntrinsicInst>(I); - IntrinsicInst* ArgII; - DFNode* ArgDFNode; + if (BuildDFG::isHPVMQueryIntrinsic(I)) { + IntrinsicInst *II = cast<IntrinsicInst>(I); + IntrinsicInst *ArgII; + DFNode *ArgDFNode; /*********************************************************************** - * Handle VISC Query intrinsics * - ***********************************************************************/ + * Handle HPVM Query intrinsics * + ***********************************************************************/ switch (II->getIntrinsicID()) { - /**************************** llvm.visc.getNode() *******************/ - case Intrinsic::visc_getNode: { + /**************************** llvm.hpvm.getNode() *******************/ + case Intrinsic::hpvm_getNode: { // add mapping <intrinsic, this node> to the node-specific map Leaf_HandleToDFNodeMap[II] = N; IItoRemove.push_back(II); break; } - /************************* llvm.visc.getParentNode() ****************/ - case Intrinsic::visc_getParentNode: { + /************************* llvm.hpvm.getParentNode() ****************/ + case Intrinsic::hpvm_getParentNode: { // get the parent node of the arg node // get argument node ArgII = cast<IntrinsicInst>((II->getOperand(0))->stripPointerCasts()); @@ -1554,69 +1531,70 @@ void CGT_X86::codeGen(DFLeafNode* N) { IItoRemove.push_back(II); break; } - /*************************** llvm.visc.getNumDims() *****************/ - case Intrinsic::visc_getNumDims: { + /*************************** llvm.hpvm.getNumDims() *****************/ + case Intrinsic::hpvm_getNumDims: { // get node from map // get the appropriate field ArgII = cast<IntrinsicInst>((II->getOperand(0))->stripPointerCasts()); int numOfDim = Leaf_HandleToDFNodeMap[ArgII]->getNumOfDim(); - IntegerType* IntTy = Type::getInt32Ty(M.getContext()); - ConstantInt* numOfDimConstant = ConstantInt::getSigned(IntTy, (int64_t) numOfDim); + IntegerType *IntTy = Type::getInt32Ty(M.getContext()); + ConstantInt *numOfDimConstant = + ConstantInt::getSigned(IntTy, (int64_t)numOfDim); II->replaceAllUsesWith(numOfDimConstant); IItoRemove.push_back(II); break; } - /*********************** llvm.visc.getNodeInstanceID() **************/ - case Intrinsic::visc_getNodeInstanceID_x: - case Intrinsic::visc_getNodeInstanceID_y: - case Intrinsic::visc_getNodeInstanceID_z: { + /*********************** llvm.hpvm.getNodeInstanceID() **************/ + case Intrinsic::hpvm_getNodeInstanceID_x: + case Intrinsic::hpvm_getNodeInstanceID_y: + case Intrinsic::hpvm_getNodeInstanceID_z: { ArgII = cast<IntrinsicInst>((II->getOperand(0))->stripPointerCasts()); ArgDFNode = Leaf_HandleToDFNodeMap[ArgII]; // The dfnode argument should be an ancestor of this leaf node or // the leaf node itself int parentLevel = N->getAncestorHops(ArgDFNode); - assert(( parentLevel >= 0 || ArgDFNode == (DFNode*)N ) - && "Invalid DFNode argument to getNodeInstanceID_[xyz]!"); + assert((parentLevel >= 0 || ArgDFNode == (DFNode *)N) && + "Invalid DFNode argument to getNodeInstanceID_[xyz]!"); // Get specified dimension // (dim = 0) => x // (dim = 1) => y // (dim = 2) => z - int dim = (int) (II->getIntrinsicID() - - Intrinsic::visc_getNodeInstanceID_x); - assert((dim >= 0) && (dim < 3) - && "Invalid dimension for getNodeInstanceID_[xyz]. Check Intrinsic ID!"); + int dim = + (int)(II->getIntrinsicID() - Intrinsic::hpvm_getNodeInstanceID_x); + assert((dim >= 0) && (dim < 3) && + "Invalid dimension for getNodeInstanceID_[xyz]. Check Intrinsic " + "ID!"); // For immediate ancestor, use the extra argument introduced in // F_X86 int numParamsF = F->getFunctionType()->getNumParams(); int numParamsF_X86 = F_X86->getFunctionType()->getNumParams(); - assert((numParamsF_X86 - numParamsF == 6) - && "Difference of arguments between function and its clone is not 6!"); + assert( + (numParamsF_X86 - numParamsF == 6) && + "Difference of arguments between function and its clone is not 6!"); - if(parentLevel == 0) { + if (parentLevel == 0) { // Case when the query is for this node itself - unsigned offset = 3 + (3-dim); + unsigned offset = 3 + (3 - dim); // Traverse argument list of F_X86 in reverse order to find the // correct index or dim argument. - Argument* indexVal = getArgumentFromEnd(F_X86, offset); + Argument *indexVal = getArgumentFromEnd(F_X86, offset); assert(indexVal && "Index argument not found. Invalid offset!"); DEBUG(errs() << *II << " replaced with " << *indexVal << "\n"); II->replaceAllUsesWith(indexVal); IItoRemove.push_back(II); - } - else { + } else { // Case when query is for an ancestor - Value* args[] = { - ConstantInt::get(Type::getInt32Ty(II->getContext()), parentLevel), - ConstantInt::get(Type::getInt32Ty(II->getContext()), dim) - }; - CallInst* CI = CallInst::Create(llvm_visc_x86_getDimInstance, - ArrayRef<Value*>(args, 2), + Value *args[] = { + ConstantInt::get(Type::getInt32Ty(II->getContext()), parentLevel), + ConstantInt::get(Type::getInt32Ty(II->getContext()), dim)}; + CallInst *CI = CallInst::Create(llvm_hpvm_x86_getDimInstance, + ArrayRef<Value *>(args, 2), "nodeInstanceID", II); DEBUG(errs() << *II << " replaced with " << *CI << "\n"); II->replaceAllUsesWith(CI); @@ -1624,10 +1602,10 @@ void CGT_X86::codeGen(DFLeafNode* N) { } break; } - /********************** llvm.visc.getNumNodeInstances() *************/ - case Intrinsic::visc_getNumNodeInstances_x: - case Intrinsic::visc_getNumNodeInstances_y: - case Intrinsic::visc_getNumNodeInstances_z: { + /********************** llvm.hpvm.getNumNodeInstances() *************/ + case Intrinsic::hpvm_getNumNodeInstances_x: + case Intrinsic::hpvm_getNumNodeInstances_y: + case Intrinsic::hpvm_getNumNodeInstances_z: { ArgII = cast<IntrinsicInst>((II->getOperand(0))->stripPointerCasts()); ArgDFNode = Leaf_HandleToDFNodeMap[ArgII]; @@ -1635,46 +1613,46 @@ void CGT_X86::codeGen(DFLeafNode* N) { // The dfnode argument should be an ancestor of this leaf node or // the leaf node itself int parentLevel = N->getAncestorHops(ArgDFNode); - assert(( parentLevel >= 0 || ArgDFNode == (DFNode*)N ) - && "Invalid DFNode argument to getNodeInstanceID_[xyz]!"); + assert((parentLevel >= 0 || ArgDFNode == (DFNode *)N) && + "Invalid DFNode argument to getNodeInstanceID_[xyz]!"); // Get specified dimension // (dim = 0) => x // (dim = 1) => y // (dim = 2) => z - int dim = (int) (II->getIntrinsicID() - - Intrinsic::visc_getNumNodeInstances_x); - assert((dim >= 0) && (dim < 3) - && "Invalid dimension for getNumNodeInstances_[xyz]. Check Intrinsic ID!"); + int dim = + (int)(II->getIntrinsicID() - Intrinsic::hpvm_getNumNodeInstances_x); + assert((dim >= 0) && (dim < 3) && + "Invalid dimension for getNumNodeInstances_[xyz]. Check " + "Intrinsic ID!"); // For immediate ancestor, use the extra argument introduced in // F_X86 int numParamsF = F->getFunctionType()->getNumParams(); int numParamsF_X86 = F_X86->getFunctionType()->getNumParams(); - assert((numParamsF_X86 - numParamsF == 6) - && "Difference of arguments between function and its clone is not 6!"); + assert( + (numParamsF_X86 - numParamsF == 6) && + "Difference of arguments between function and its clone is not 6!"); - if(parentLevel == 0) { + if (parentLevel == 0) { // Case when the query is for this node itself unsigned offset = 3 - dim; // Traverse argument list of F_X86 in reverse order to find the // correct index or dim argument. - Argument* limitVal = getArgumentFromEnd(F_X86, offset); + Argument *limitVal = getArgumentFromEnd(F_X86, offset); assert(limitVal && "Limit argument not found. Invalid offset!"); - DEBUG(errs() << *II << " replaced with " << *limitVal << "\n"); + DEBUG(errs() << *II << " replaced with " << *limitVal << "\n"); II->replaceAllUsesWith(limitVal); IItoRemove.push_back(II); - } - else { + } else { // Case when query is from the ancestor - Value* args[] = { - ConstantInt::get(Type::getInt32Ty(II->getContext()), parentLevel), - ConstantInt::get(Type::getInt32Ty(II->getContext()), dim) - }; - CallInst* CI = CallInst::Create(llvm_visc_x86_getDimLimit, - ArrayRef<Value*>(args, 2), + Value *args[] = { + ConstantInt::get(Type::getInt32Ty(II->getContext()), parentLevel), + ConstantInt::get(Type::getInt32Ty(II->getContext()), dim)}; + CallInst *CI = CallInst::Create(llvm_hpvm_x86_getDimLimit, + ArrayRef<Value *>(args, 2), "numNodeInstances", II); DEBUG(errs() << *II << " replaced with " << *CI << "\n"); II->replaceAllUsesWith(CI); @@ -1684,19 +1662,16 @@ void CGT_X86::codeGen(DFLeafNode* N) { break; } default: - DEBUG(errs() << "Found unknown intrinsic with ID = " << - II->getIntrinsicID() << "\n"); - assert(false && "Unknown VISC Intrinsic!"); + DEBUG(errs() << "Found unknown intrinsic with ID = " + << II->getIntrinsicID() << "\n"); + assert(false && "Unknown HPVM Intrinsic!"); break; } } else { } - } - - // Remove them in reverse order for (std::vector<IntrinsicInst *>::iterator i = IItoRemove.begin(); i != IItoRemove.end(); ++i) { @@ -1710,8 +1685,7 @@ void CGT_X86::codeGen(DFLeafNode* N) { } // End of namespace char DFG2LLVM_X86::ID = 0; -static RegisterPass<DFG2LLVM_X86> X("dfg2llvm-x86", - "Dataflow Graph to LLVM for X86 backend", - false /* does not modify the CFG */, - true /* transformation, not just analysis */); - +static RegisterPass<DFG2LLVM_X86> + X("dfg2llvm-x86", "Dataflow Graph to LLVM for X86 backend", + false /* does not modify the CFG */, + true /* transformation, not just analysis */); diff --git a/hpvm/lib/Transforms/GenVISC/CMakeLists.txt b/hpvm/lib/Transforms/GenHPVM/CMakeLists.txt similarity index 74% rename from hpvm/lib/Transforms/GenVISC/CMakeLists.txt rename to hpvm/lib/Transforms/GenHPVM/CMakeLists.txt index ed087f63b4933a33792d7cd773acdf8fab1ac8e3..967766e7058c1ef8bcc1414afb7ff0087e3ce188 100644 --- a/hpvm/lib/Transforms/GenVISC/CMakeLists.txt +++ b/hpvm/lib/Transforms/GenHPVM/CMakeLists.txt @@ -2,9 +2,9 @@ if(WIN32 OR CYGWIN) set(LLVM_LINK_COMPONENTS Core Support) endif() -add_llvm_library( LLVMGenVISC +add_llvm_library( LLVMGenHPVM MODULE - GenVISC.cpp + GenHPVM.cpp DEPENDS intrinsics_gen diff --git a/hpvm/lib/Transforms/GenHPVM/GenHPVM.cpp b/hpvm/lib/Transforms/GenHPVM/GenHPVM.cpp new file mode 100644 index 0000000000000000000000000000000000000000..738b39905b885aa42bc861e3a19c3bdf9c65668e --- /dev/null +++ b/hpvm/lib/Transforms/GenHPVM/GenHPVM.cpp @@ -0,0 +1,894 @@ +//=== GenHPVM.cpp - Implements "Hierarchical Dataflow Graph Builder Pass" ===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "genhpvm" +#include "GenHPVM/GenHPVM.h" + +#include "SupportHPVM/HPVMHint.h" +#include "SupportHPVM/HPVMUtils.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IRReader/IRReader.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/ValueMapper.h" + +#define TIMER(X) \ + do { \ + if (HPVMTimer) { \ + X; \ + } \ + } while (0) + +using namespace llvm; +using namespace hpvmUtils; + +// HPVM Command line option to use timer or not +static cl::opt<bool> HPVMTimer("hpvm-timers-gen", + cl::desc("Enable GenHPVM timer")); + +namespace genhpvm { + +// Helper Functions + +static inline ConstantInt *getTimerID(Module &, enum hpvm_TimerID); +static Function *transformReturnTypeToStruct(Function *F); +static Type *getReturnTypeFromReturnInst(Function *F); + +// Check if the dummy function call is a __hpvm__node call +#define IS_HPVM_CALL(callName) \ + static bool isHPVMCall_##callName(Instruction *I) { \ + if (!isa<CallInst>(I)) \ + return false; \ + CallInst *CI = cast<CallInst>(I); \ + return (CI->getCalledValue()->stripPointerCasts()->getName()) \ + .equals("__hpvm__" #callName); \ + } + +static void ReplaceCallWithIntrinsic(Instruction *I, Intrinsic::ID IntrinsicID, + std::vector<Instruction *> *Erase) { + // Check if the instruction is Call Instruction + assert(isa<CallInst>(I) && "Expecting CallInst"); + CallInst *CI = cast<CallInst>(I); + DEBUG(errs() << "Found call: " << *CI << "\n"); + + // Find the correct intrinsic call + Module *M = CI->getParent()->getParent()->getParent(); + Function *F; + std::vector<Type *> ArgTypes; + std::vector<Value *> args; + if (Intrinsic::isOverloaded(IntrinsicID)) { + // This is an overloaded intrinsic. The types must exactly match. Get the + // argument types + for (unsigned i = 0; i < CI->getNumArgOperands(); i++) { + ArgTypes.push_back(CI->getArgOperand(i)->getType()); + args.push_back(CI->getArgOperand(i)); + } + F = Intrinsic::getDeclaration(M, IntrinsicID, ArgTypes); + DEBUG(errs() << *F << "\n"); + } else { // Non-overloaded intrinsic + F = Intrinsic::getDeclaration(M, IntrinsicID); + FunctionType *FTy = F->getFunctionType(); + DEBUG(errs() << *F << "\n"); + + // Create argument list + assert(CI->getNumArgOperands() == FTy->getNumParams() && + "Number of arguments of call do not match with Intrinsic"); + for (unsigned i = 0; i < CI->getNumArgOperands(); i++) { + Value *V = CI->getArgOperand(i); + // Either the type should match or both should be of pointer type + assert((V->getType() == FTy->getParamType(i) || + (V->getType()->isPointerTy() && + FTy->getParamType(i)->isPointerTy())) && + "Dummy function call argument does not match with Intrinsic " + "argument!"); + // If the types do not match, then both must be pointer type and pointer + // cast needs to be performed + if (V->getType() != FTy->getParamType(i)) { + V = CastInst::CreatePointerCast(V, FTy->getParamType(i), "", CI); + } + args.push_back(V); + } + } + // Insert call instruction + CallInst *Inst = CallInst::Create( + F, args, F->getReturnType()->isVoidTy() ? "" : CI->getName(), CI); + + DEBUG(errs() << "\tSubstitute with: " << *Inst << "\n"); + + CI->replaceAllUsesWith(Inst); + // If the previous instruction needs to be erased, insert it in the vector + // Erased + if (Erase != NULL) + Erase->push_back(CI); +} + +IS_HPVM_CALL(launch) /* Exists but not required */ +IS_HPVM_CALL(edge) /* Exists but not required */ +IS_HPVM_CALL(createNodeND) +// IS_HPVM_CALL(createNode) +// IS_HPVM_CALL(createNode1D) +// IS_HPVM_CALL(createNode2D) +// IS_HPVM_CALL(createNode3D) +IS_HPVM_CALL(bindIn) +IS_HPVM_CALL(bindOut) +IS_HPVM_CALL(push) +IS_HPVM_CALL(pop) +IS_HPVM_CALL(getNode) +IS_HPVM_CALL(getParentNode) +IS_HPVM_CALL(barrier) +IS_HPVM_CALL(malloc) +IS_HPVM_CALL(return ) +IS_HPVM_CALL(getNodeInstanceID_x) +IS_HPVM_CALL(getNodeInstanceID_y) +IS_HPVM_CALL(getNodeInstanceID_z) +IS_HPVM_CALL(getNumNodeInstances_x) +IS_HPVM_CALL(getNumNodeInstances_y) +IS_HPVM_CALL(getNumNodeInstances_z) +// Atomics +IS_HPVM_CALL(atomic_cmpxchg) +IS_HPVM_CALL(atomic_add) +IS_HPVM_CALL(atomic_sub) +IS_HPVM_CALL(atomic_xchg) +IS_HPVM_CALL(atomic_inc) +IS_HPVM_CALL(atomic_dec) +IS_HPVM_CALL(atomic_min) +IS_HPVM_CALL(atomic_max) +IS_HPVM_CALL(atomic_umin) +IS_HPVM_CALL(atomic_umax) +IS_HPVM_CALL(atomic_and) +IS_HPVM_CALL(atomic_or) +IS_HPVM_CALL(atomic_xor) +// Misc Fn +IS_HPVM_CALL(floor) +IS_HPVM_CALL(rsqrt) +IS_HPVM_CALL(sqrt) +IS_HPVM_CALL(sin) +IS_HPVM_CALL(cos) + +IS_HPVM_CALL(init) +IS_HPVM_CALL(cleanup) +IS_HPVM_CALL(wait) +IS_HPVM_CALL(trackMemory) +IS_HPVM_CALL(untrackMemory) +IS_HPVM_CALL(requestMemory) +IS_HPVM_CALL(attributes) +IS_HPVM_CALL(hint) + +// Return the constant integer represented by value V +static unsigned getNumericValue(Value *V) { + assert( + isa<ConstantInt>(V) && + "Value indicating the number of arguments should be a constant integer"); + return cast<ConstantInt>(V)->getZExtValue(); +} + +// Take the __hpvm__return instruction and generate code for combining the +// values being returned into a struct and returning it. +// The first operand is the number of returned values +static Value *genCodeForReturn(CallInst *CI) { + LLVMContext &Ctx = CI->getContext(); + assert(isHPVMCall_return(CI) && "__hpvm__return instruction expected!"); + + // Parse the dummy function call here + assert(CI->getNumArgOperands() > 0 && + "Too few arguments for __hpvm_return call!\n"); + unsigned numRetVals = getNumericValue(CI->getArgOperand(0)); + + assert(CI->getNumArgOperands() - 1 == numRetVals && + "Too few arguments for __hpvm_return call!\n"); + DEBUG(errs() << "\tNum of return values = " << numRetVals << "\n"); + + std::vector<Type *> ArgTypes; + for (unsigned i = 1; i < CI->getNumArgOperands(); i++) { + ArgTypes.push_back(CI->getArgOperand(i)->getType()); + } + Twine outTyName = "struct.out." + CI->getParent()->getParent()->getName(); + StructType *RetTy = StructType::create(Ctx, ArgTypes, outTyName.str(), true); + + InsertValueInst *IV = InsertValueInst::Create( + UndefValue::get(RetTy), CI->getArgOperand(1), 0, "returnStruct", CI); + DEBUG(errs() << "Code generation for return:\n"); + DEBUG(errs() << *IV << "\n"); + + for (unsigned i = 2; i < CI->getNumArgOperands(); i++) { + IV = InsertValueInst::Create(IV, CI->getArgOperand(i), i - 1, IV->getName(), + CI); + DEBUG(errs() << *IV << "\n"); + } + + return IV; +} + +// Analyse the attribute call for this function. Add the in and out +// attributes to pointer parameters. +static void handleHPVMAttributes(Function *F, CallInst *CI) { + DEBUG(errs() << "Kernel before adding In/Out HPVM attributes:\n" + << *F << "\n"); + // Parse the dummy function call here + unsigned offset = 0; + // Find number of In pointers + assert(CI->getNumArgOperands() > offset && + "Too few arguments for __hpvm__attributes call!"); + unsigned numInPtrs = getNumericValue(CI->getArgOperand(offset)); + DEBUG(errs() << "\tNum of in pointers = " << numInPtrs << "\n"); + + for (unsigned i = offset + 1; i < offset + 1 + numInPtrs; i++) { + Value *V = CI->getArgOperand(i); + if (Argument *arg = dyn_cast<Argument>(V)) { + F->addAttribute(1 + arg->getArgNo(), Attribute::In); + } else { + DEBUG(errs() << "Invalid argument to __hpvm__attribute: " << *V << "\n"); + llvm_unreachable( + "Only pointer arguments can be passed to __hpvm__attributes call"); + } + } + // Find number of Out Pointers + offset += 1 + numInPtrs; + assert(CI->getNumArgOperands() > offset && + "Too few arguments for __hpvm__attributes call!"); + unsigned numOutPtrs = getNumericValue(CI->getOperand(offset)); + DEBUG(errs() << "\tNum of out Pointers = " << numOutPtrs << "\n"); + for (unsigned i = offset + 1; i < offset + 1 + numOutPtrs; i++) { + Value *V = CI->getArgOperand(i); + if (Argument *arg = dyn_cast<Argument>(V)) { + F->addAttribute(1 + arg->getArgNo(), Attribute::Out); + } else { + DEBUG(errs() << "Invalid argument to __hpvm__attribute: " << *V << "\n"); + llvm_unreachable( + "Only pointer arguments can be passed to __hpvm__attributes call"); + } + } + DEBUG(errs() << "Kernel after adding In/Out HPVM attributes:\n" + << *F << "\n"); +} + +// Public Functions of GenHPVM pass +bool GenHPVM::runOnModule(Module &M) { + DEBUG(errs() << "\nGENHPVM PASS\n"); + this->M = &M; + + // Load Runtime API Module + SMDiagnostic Err; + + char *LLVM_SRC_ROOT = getenv("LLVM_SRC_ROOT"); + assert(LLVM_SRC_ROOT != NULL && "Define LLVM_SRC_ROOT environment variable!"); + + Twine llvmSrcRoot = LLVM_SRC_ROOT; + Twine runtimeAPI = + llvmSrcRoot + "/../build/tools/hpvm/projects/hpvm-rt/hpvm-rt.bc"; + DEBUG(errs() << llvmSrcRoot << "\n"); + + std::unique_ptr<Module> runtimeModule = + parseIRFile(runtimeAPI.str(), Err, M.getContext()); + + if (runtimeModule == NULL) { + DEBUG(errs() << Err.getMessage() << " " << runtimeAPI << "\n"); + assert(false && "couldn't parse runtime"); + } else + DEBUG(errs() << "Successfully loaded hpvm-rt API module\n"); + + llvm_hpvm_initializeTimerSet = M.getOrInsertFunction( + "llvm_hpvm_initializeTimerSet", + runtimeModule->getFunction("llvm_hpvm_initializeTimerSet") + ->getFunctionType()); + // DEBUG(errs() << *llvm_hpvm_initializeTimerSet); + + llvm_hpvm_switchToTimer = M.getOrInsertFunction( + "llvm_hpvm_switchToTimer", + runtimeModule->getFunction("llvm_hpvm_switchToTimer")->getFunctionType()); + // DEBUG(errs() << *llvm_hpvm_switchToTimer); + + llvm_hpvm_printTimerSet = M.getOrInsertFunction( + "llvm_hpvm_printTimerSet", + runtimeModule->getFunction("llvm_hpvm_printTimerSet")->getFunctionType()); + // DEBUG(errs() << *llvm_hpvm_printTimerSet); + + // Insert init context in main + DEBUG(errs() << "Locate __hpvm__init()\n"); + Function *VI = M.getFunction("__hpvm__init"); + assert(VI->getNumUses() == 1 && "__hpvm__init should only be used once"); + Instruction *I = cast<Instruction>(*VI->user_begin()); + + DEBUG(errs() << "Initialize Timer Set\n"); + initializeTimerSet(I); + switchToTimer(hpvm_TimerID_NONE, I); + + // Insert print instruction at hpvm exit + DEBUG(errs() << "Locate __hpvm__cleanup()\n"); + Function *VC = M.getFunction("__hpvm__cleanup"); + assert(VC->getNumUses() == 1 && "__hpvm__cleanup should only be used once"); + I = cast<Instruction>(*VC->user_begin()); + printTimerSet(I); + + DEBUG(errs() << "-------- Searching for launch sites ----------\n"); + + std::vector<Instruction *> toBeErased; + std::vector<Function *> functions; + + for (auto &F : M) + functions.push_back(&F); + + // Iterate over all functions in the module + for (Function *f : functions) { + DEBUG(errs() << "Function: " << f->getName() << "\n"); + + // List with the required additions in the function's return type + std::vector<Type *> FRetTypes; + + enum mutateTypeCause { + mtc_None, + mtc_BIND, + mtc_RETURN, + mtc_NUM_CAUSES + } bind; + bind = mutateTypeCause::mtc_None; + + // Iterate over all the instructions in this function + for (inst_iterator i = inst_begin(f), e = inst_end(f); i != e; ++i) { + Instruction *I = &*i; // Grab pointer to Instruction + // If not a call instruction, move to next instruction + if (!isa<CallInst>(I)) + continue; + + CallInst *CI = cast<CallInst>(I); + LLVMContext &Ctx = CI->getContext(); + + if (isHPVMCall_init(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_init, &toBeErased); + } + if (isHPVMCall_cleanup(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_cleanup, &toBeErased); + } + if (isHPVMCall_wait(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_wait, &toBeErased); + } + if (isHPVMCall_trackMemory(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_trackMemory, &toBeErased); + } + if (isHPVMCall_untrackMemory(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_untrackMemory, &toBeErased); + } + if (isHPVMCall_requestMemory(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_requestMemory, &toBeErased); + } + if (isHPVMCall_hint(I)) { + assert(isa<ConstantInt>(CI->getArgOperand(0)) && + "Argument to hint must be constant integer!"); + ConstantInt *hint = cast<ConstantInt>(CI->getArgOperand(0)); + + hpvm::Target t = (hpvm::Target)hint->getZExtValue(); + addHint(CI->getParent()->getParent(), t); + DEBUG(errs() << "Found hpvm hint call: " << *CI << "\n"); + toBeErased.push_back(CI); + } + if (isHPVMCall_launch(I)) { + Function *LaunchF = + Intrinsic::getDeclaration(&M, Intrinsic::hpvm_launch); + DEBUG(errs() << *LaunchF << "\n"); + // Get i8* cast to function pointer + Function *graphFunc = cast<Function>(CI->getArgOperand(1)); + graphFunc = transformReturnTypeToStruct(graphFunc); + Constant *F = + ConstantExpr::getPointerCast(graphFunc, Type::getInt8PtrTy(Ctx)); + assert( + F && + "Function invoked by HPVM launch has to be define and constant."); + + ConstantInt *Op = cast<ConstantInt>(CI->getArgOperand(0)); + assert(Op && "HPVM launch's streaming argument is a constant value."); + Value *isStreaming = Op->isZero() ? ConstantInt::getFalse(Ctx) + : ConstantInt::getTrue(Ctx); + + auto *ArgTy = dyn_cast<PointerType>(CI->getArgOperand(2)->getType()); + assert(ArgTy && "HPVM launch argument should be pointer type."); + Value *Arg = CI->getArgOperand(2); + if (!ArgTy->getElementType()->isIntegerTy(8)) + Arg = BitCastInst::CreatePointerCast(CI->getArgOperand(2), + Type::getInt8PtrTy(Ctx), "", CI); + Value *LaunchArgs[] = {F, Arg, isStreaming}; + CallInst *LaunchInst = CallInst::Create( + LaunchF, ArrayRef<Value *>(LaunchArgs, 3), "graphID", CI); + DEBUG(errs() << "Found hpvm launch call: " << *CI << "\n"); + DEBUG(errs() << "\tSubstitute with: " << *LaunchInst << "\n"); + CI->replaceAllUsesWith(LaunchInst); + toBeErased.push_back(CI); + } + if (isHPVMCall_push(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_push, &toBeErased); + } + if (isHPVMCall_pop(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_pop, &toBeErased); + } + if (isHPVMCall_createNodeND(I)) { + assert(CI->getNumArgOperands() > 0 && + "Too few arguments for __hpvm__createNodeND call"); + unsigned numDims = getNumericValue(CI->getArgOperand(0)); + // We need as meny dimension argments are there are dimensions + assert(CI->getNumArgOperands() - 2 == numDims && + "Too few arguments for __hpvm_createNodeND call!\n"); + + Function *CreateNodeF; + switch (numDims) { + case 0: + CreateNodeF = + Intrinsic::getDeclaration(&M, Intrinsic::hpvm_createNode); + break; + case 1: + CreateNodeF = + Intrinsic::getDeclaration(&M, Intrinsic::hpvm_createNode1D); + break; + case 2: + CreateNodeF = + Intrinsic::getDeclaration(&M, Intrinsic::hpvm_createNode2D); + break; + case 3: + CreateNodeF = + Intrinsic::getDeclaration(&M, Intrinsic::hpvm_createNode3D); + break; + default: + llvm_unreachable("Unsupported number of dimensions\n"); + break; + } + DEBUG(errs() << *CreateNodeF << "\n"); + DEBUG(errs() << *I << "\n"); + DEBUG(errs() << "in " << I->getParent()->getParent()->getName() + << "\n"); + + // Get i8* cast to function pointer + Function *graphFunc = cast<Function>(CI->getArgOperand(1)); + graphFunc = transformReturnTypeToStruct(graphFunc); + Constant *F = + ConstantExpr::getPointerCast(graphFunc, Type::getInt8PtrTy(Ctx)); + + CallInst *CreateNodeInst; + switch (numDims) { + case 0: + CreateNodeInst = CallInst::Create(CreateNodeF, ArrayRef<Value *>(F), + graphFunc->getName() + ".node", CI); + break; + case 1: { + assert((CI->getArgOperand(2)->getType() == Type::getInt64Ty(Ctx)) && + "CreateNodeND dimension argument, 2, expected to be i64\n"); + Value *CreateNodeArgs[] = {F, CI->getArgOperand(2)}; + CreateNodeInst = CallInst::Create( + CreateNodeF, ArrayRef<Value *>(CreateNodeArgs, 2), + graphFunc->getName() + ".node", CI); + } break; + case 2: { + assert((CI->getArgOperand(2)->getType() == Type::getInt64Ty(Ctx)) && + "CreateNodeND dimension argument, 2, expected to be i64\n"); + assert((CI->getArgOperand(3)->getType() == Type::getInt64Ty(Ctx)) && + "CreateNodeND dimension argument, 3, expected to be i64\n"); + Value *CreateNodeArgs[] = {F, CI->getArgOperand(2), + CI->getArgOperand(3)}; + CreateNodeInst = CallInst::Create( + CreateNodeF, ArrayRef<Value *>(CreateNodeArgs, 3), + graphFunc->getName() + ".node", CI); + } break; + case 3: { + assert((CI->getArgOperand(2)->getType() == Type::getInt64Ty(Ctx)) && + "CreateNodeND dimension argument, 2, expected to be i64\n"); + assert((CI->getArgOperand(3)->getType() == Type::getInt64Ty(Ctx)) && + "CreateNodeND dimension argument, 3, expected to be i64\n"); + assert((CI->getArgOperand(4)->getType() == Type::getInt64Ty(Ctx)) && + "CreateNodeND dimension argument, 4, expected to be i64\n"); + Value *CreateNodeArgs[] = {F, CI->getArgOperand(2), + CI->getArgOperand(3), + CI->getArgOperand(4)}; + CreateNodeInst = CallInst::Create( + CreateNodeF, ArrayRef<Value *>(CreateNodeArgs, 4), + graphFunc->getName() + ".node", CI); + } break; + default: + llvm_unreachable( + "Impossible path: number of dimensions is 0, 1, 2, 3\n"); + break; + } + + DEBUG(errs() << "Found hpvm createNode call: " << *CI << "\n"); + DEBUG(errs() << "\tSubstitute with: " << *CreateNodeInst << "\n"); + CI->replaceAllUsesWith(CreateNodeInst); + toBeErased.push_back(CI); + } + + if (isHPVMCall_edge(I)) { + Function *EdgeF = + Intrinsic::getDeclaration(&M, Intrinsic::hpvm_createEdge); + DEBUG(errs() << *EdgeF << "\n"); + ConstantInt *Op = cast<ConstantInt>(CI->getArgOperand(5)); + ConstantInt *EdgeTypeOp = cast<ConstantInt>(CI->getArgOperand(2)); + assert(Op && EdgeTypeOp && + "Arguments of CreateEdge are not constant integers."); + Value *isStreaming = Op->isZero() ? ConstantInt::getFalse(Ctx) + : ConstantInt::getTrue(Ctx); + Value *isAllToAll = EdgeTypeOp->isZero() ? ConstantInt::getFalse(Ctx) + : ConstantInt::getTrue(Ctx); + Value *EdgeArgs[] = {CI->getArgOperand(0), CI->getArgOperand(1), + isAllToAll, CI->getArgOperand(3), + CI->getArgOperand(4), isStreaming}; + CallInst *EdgeInst = CallInst::Create( + EdgeF, ArrayRef<Value *>(EdgeArgs, 6), "output", CI); + DEBUG(errs() << "Found hpvm edge call: " << *CI << "\n"); + DEBUG(errs() << "\tSubstitute with: " << *EdgeInst << "\n"); + CI->replaceAllUsesWith(EdgeInst); + toBeErased.push_back(CI); + } + if (isHPVMCall_bindIn(I)) { + Function *BindInF = + Intrinsic::getDeclaration(&M, Intrinsic::hpvm_bind_input); + DEBUG(errs() << *BindInF << "\n"); + // Check if this is a streaming bind or not + ConstantInt *Op = cast<ConstantInt>(CI->getArgOperand(3)); + assert(Op && "Streaming argument for bind in intrinsic should be a " + "constant integer."); + Value *isStreaming = Op->isZero() ? ConstantInt::getFalse(Ctx) + : ConstantInt::getTrue(Ctx); + Value *BindInArgs[] = {CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), isStreaming}; + CallInst *BindInInst = + CallInst::Create(BindInF, ArrayRef<Value *>(BindInArgs, 4), "", CI); + DEBUG(errs() << "Found hpvm bindIn call: " << *CI << "\n"); + DEBUG(errs() << "\tSubstitute with: " << *BindInInst << "\n"); + CI->replaceAllUsesWith(BindInInst); + toBeErased.push_back(CI); + } + if (isHPVMCall_bindOut(I)) { + Function *BindOutF = + Intrinsic::getDeclaration(&M, Intrinsic::hpvm_bind_output); + DEBUG(errs() << *BindOutF << "\n"); + // Check if this is a streaming bind or not + ConstantInt *Op = cast<ConstantInt>(CI->getArgOperand(3)); + assert(Op && "Streaming argument for bind out intrinsic should be a " + "constant integer."); + Value *isStreaming = Op->isZero() ? ConstantInt::getFalse(Ctx) + : ConstantInt::getTrue(Ctx); + Value *BindOutArgs[] = {CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), isStreaming}; + CallInst *BindOutInst = CallInst::Create( + BindOutF, ArrayRef<Value *>(BindOutArgs, 4), "", CI); + DEBUG(errs() << "Found hpvm bindOut call: " << *CI << "\n"); + DEBUG(errs() << "\tSubstitute with: " << *BindOutInst << "\n"); + + DEBUG(errs() << "Fixing the return type of the function\n"); + // FIXME: What if the child node function has not been visited already. + // i.e., it's return type has not been fixed. + Function *F = I->getParent()->getParent(); + DEBUG(errs() << F->getName() << "\n";); + IntrinsicInst *NodeIntrinsic = + cast<IntrinsicInst>(CI->getArgOperand(0)); + assert(NodeIntrinsic && + "Instruction value in bind out is not a create node intrinsic."); + DEBUG(errs() << "Node intrinsic: " << *NodeIntrinsic << "\n"); + assert( + (NodeIntrinsic->getIntrinsicID() == Intrinsic::hpvm_createNode || + NodeIntrinsic->getIntrinsicID() == Intrinsic::hpvm_createNode1D || + NodeIntrinsic->getIntrinsicID() == Intrinsic::hpvm_createNode2D || + NodeIntrinsic->getIntrinsicID() == Intrinsic::hpvm_createNode3D) && + "Instruction value in bind out is not a create node intrinsic."); + Function *ChildF = cast<Function>( + NodeIntrinsic->getArgOperand(0)->stripPointerCasts()); + DEBUG(errs() << ChildF->getName() << "\n";); + int srcpos = cast<ConstantInt>(CI->getArgOperand(1))->getSExtValue(); + int destpos = cast<ConstantInt>(CI->getArgOperand(2))->getSExtValue(); + StructType *ChildReturnTy = cast<StructType>(ChildF->getReturnType()); + + Type *ReturnType = F->getReturnType(); + DEBUG(errs() << *ReturnType << "\n";); + assert((ReturnType->isVoidTy() || isa<StructType>(ReturnType)) && + "Return type should either be a struct or void type!"); + + FRetTypes.insert(FRetTypes.begin() + destpos, + ChildReturnTy->getElementType(srcpos)); + assert(((bind == mutateTypeCause::mtc_BIND) || + (bind == mutateTypeCause::mtc_None)) && + "Both bind_out and hpvm_return detected"); + bind = mutateTypeCause::mtc_BIND; + + CI->replaceAllUsesWith(BindOutInst); + toBeErased.push_back(CI); + } + if (isHPVMCall_attributes(I)) { + Function *F = CI->getParent()->getParent(); + handleHPVMAttributes(F, CI); + toBeErased.push_back(CI); + } + if (isHPVMCall_getNode(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_getNode, &toBeErased); + } + if (isHPVMCall_getParentNode(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_getParentNode, &toBeErased); + } + if (isHPVMCall_barrier(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_barrier, &toBeErased); + } + if (isHPVMCall_malloc(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_malloc, &toBeErased); + } + if (isHPVMCall_return(I)) { + DEBUG(errs() << "Function before hpvm return processing\n" + << *I->getParent()->getParent() << "\n"); + // The operands to this call are the values to be returned by the node + Value *ReturnVal = genCodeForReturn(CI); + DEBUG(errs() << *ReturnVal << "\n"); + Type *ReturnType = ReturnVal->getType(); + assert(isa<StructType>(ReturnType) && + "Return type should be a struct type!"); + + assert(((bind == mutateTypeCause::mtc_RETURN) || + (bind == mutateTypeCause::mtc_None)) && + "Both bind_out and hpvm_return detected"); + + if (bind == mutateTypeCause::mtc_None) { + // If this is None, this is the first __hpvm__return + // instruction we have come upon. Place the return type of the + // function in the return type vector + bind = mutateTypeCause::mtc_RETURN; + StructType *ReturnStructTy = cast<StructType>(ReturnType); + for (unsigned i = 0; i < ReturnStructTy->getNumElements(); i++) + FRetTypes.push_back(ReturnStructTy->getElementType(i)); + } else { // bind == mutateTypeCause::mtc_RETURN + // This is not the first __hpvm__return + // instruction we have come upon. + // Check that the return types are the same + assert((ReturnType == FRetTypes[0]) && + "Multiple returns with mismatching types"); + } + + ReturnInst *RetInst = ReturnInst::Create(Ctx, ReturnVal); + DEBUG(errs() << "Found hpvm return call: " << *CI << "\n"); + Instruction *oldReturn = CI->getParent()->getTerminator(); + assert(isa<ReturnInst>(oldReturn) && + "Expecting a return to be the terminator of this BB!"); + DEBUG(errs() << "Found return statement of BB: " << *oldReturn << "\n"); + DEBUG(errs() << "\tSubstitute return with: " << *RetInst << "\n"); + // CI->replaceAllUsesWith(RetInst); + toBeErased.push_back(CI); + ReplaceInstWithInst(oldReturn, RetInst); + DEBUG(errs() << "Function after hpvm return processing\n" + << *I->getParent()->getParent() << "\n"); + } + + if (isHPVMCall_getNodeInstanceID_x(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_getNodeInstanceID_x, + &toBeErased); + } + if (isHPVMCall_getNodeInstanceID_y(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_getNodeInstanceID_y, + &toBeErased); + } + if (isHPVMCall_getNodeInstanceID_z(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_getNodeInstanceID_z, + &toBeErased); + } + if (isHPVMCall_getNumNodeInstances_x(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_getNumNodeInstances_x, + &toBeErased); + } + if (isHPVMCall_getNumNodeInstances_y(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_getNumNodeInstances_y, + &toBeErased); + } + if (isHPVMCall_getNumNodeInstances_z(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_getNumNodeInstances_z, + &toBeErased); + } + if (isHPVMCall_atomic_add(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_atomic_add, &toBeErased); + } + if (isHPVMCall_atomic_sub(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_atomic_sub, &toBeErased); + } + if (isHPVMCall_atomic_xchg(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_atomic_xchg, &toBeErased); + } + if (isHPVMCall_atomic_min(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_atomic_min, &toBeErased); + } + if (isHPVMCall_atomic_max(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_atomic_max, &toBeErased); + } + if (isHPVMCall_atomic_and(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_atomic_and, &toBeErased); + } + if (isHPVMCall_atomic_or(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_atomic_or, &toBeErased); + } + if (isHPVMCall_atomic_xor(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_atomic_xor, &toBeErased); + } + if (isHPVMCall_sin(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::sin, &toBeErased); + } + if (isHPVMCall_cos(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::cos, &toBeErased); + } + } + + // Erase the __hpvm__node calls + DEBUG(errs() << "Erase " << toBeErased.size() << " Statements:\n"); + for (auto I : toBeErased) { + DEBUG(errs() << *I << "\n"); + } + while (!toBeErased.empty()) { + Instruction *I = toBeErased.back(); + DEBUG(errs() << "\tErasing " << *I << "\n"); + I->eraseFromParent(); + toBeErased.pop_back(); + } + + if (bind == mutateTypeCause::mtc_BIND || + bind == mutateTypeCause::mtc_RETURN) { + DEBUG(errs() << "Function before fixing return type\n" << *f << "\n"); + // Argument type list. + std::vector<Type *> FArgTypes; + for (Function::const_arg_iterator ai = f->arg_begin(), ae = f->arg_end(); + ai != ae; ++ai) { + FArgTypes.push_back(ai->getType()); + } + + // Find new return type of function + Type *NewReturnTy; + if (bind == mutateTypeCause::mtc_BIND) { + + std::vector<Type *> TyList; + for (unsigned i = 0; i < FRetTypes.size(); i++) + TyList.push_back(FRetTypes[i]); + + NewReturnTy = + StructType::create(f->getContext(), TyList, + Twine("struct.out." + f->getName()).str(), true); + } else { + NewReturnTy = getReturnTypeFromReturnInst(f); + assert(NewReturnTy->isStructTy() && "Expecting a struct type!"); + } + + FunctionType *FTy = + FunctionType::get(NewReturnTy, FArgTypes, f->isVarArg()); + + // Change the function type + Function *newF = cloneFunction(f, FTy, false); + DEBUG(errs() << *newF << "\n"); + + if (bind == mutateTypeCause::mtc_BIND) { + // This is certainly an internal node, and hence just one BB with one + // return terminator instruction. Change return statement + ReturnInst *RI = + cast<ReturnInst>(newF->getEntryBlock().getTerminator()); + ReturnInst *newRI = ReturnInst::Create(newF->getContext(), + UndefValue::get(NewReturnTy)); + ReplaceInstWithInst(RI, newRI); + } + if (bind == mutateTypeCause::mtc_RETURN) { + // Nothing + } + replaceNodeFunctionInIR(*f->getParent(), f, newF); + DEBUG(errs() << "Function after fixing return type\n" << *newF << "\n"); + } + } + return false; // TODO: What does returning "false" mean? +} + +// Generate Code for declaring a constant string [L x i8] and return a pointer +// to the start of it. +Value *GenHPVM::getStringPointer(const Twine &S, Instruction *IB, + const Twine &Name) { + Constant *SConstant = + ConstantDataArray::getString(M->getContext(), S.str(), true); + Value *SGlobal = + new GlobalVariable(*M, SConstant->getType(), true, + GlobalValue::InternalLinkage, SConstant, Name); + Value *Zero = ConstantInt::get(Type::getInt64Ty(M->getContext()), 0); + Value *GEPArgs[] = {Zero, Zero}; + GetElementPtrInst *SPtr = GetElementPtrInst::Create( + nullptr, SGlobal, ArrayRef<Value *>(GEPArgs, 2), Name + "Ptr", IB); + return SPtr; +} + +void GenHPVM::initializeTimerSet(Instruction *InsertBefore) { + Value *TimerSetAddr; + StoreInst *SI; + TIMER(TimerSet = new GlobalVariable( + *M, Type::getInt8PtrTy(M->getContext()), false, + GlobalValue::CommonLinkage, + Constant::getNullValue(Type::getInt8PtrTy(M->getContext())), + "hpvmTimerSet_GenHPVM")); + DEBUG(errs() << "Inserting GV: " << *TimerSet->getType() << *TimerSet + << "\n"); + // DEBUG(errs() << "Inserting call to: " << *llvm_hpvm_initializeTimerSet << + // "\n"); + + TIMER(TimerSetAddr = CallInst::Create(llvm_hpvm_initializeTimerSet, None, "", + InsertBefore)); + DEBUG(errs() << "TimerSetAddress = " << *TimerSetAddr << "\n"); + TIMER(SI = new StoreInst(TimerSetAddr, TimerSet, InsertBefore)); + DEBUG(errs() << "Store Timer Address in Global variable: " << *SI << "\n"); +} + +void GenHPVM::switchToTimer(enum hpvm_TimerID timer, + Instruction *InsertBefore) { + Value *switchArgs[] = {TimerSet, getTimerID(*M, timer)}; + TIMER(CallInst::Create(llvm_hpvm_switchToTimer, + ArrayRef<Value *>(switchArgs, 2), "", InsertBefore)); +} + +void GenHPVM::printTimerSet(Instruction *InsertBefore) { + Value *TimerName; + TIMER(TimerName = getStringPointer("GenHPVM_Timer", InsertBefore)); + Value *printArgs[] = {TimerSet, TimerName}; + TIMER(CallInst::Create(llvm_hpvm_printTimerSet, + ArrayRef<Value *>(printArgs, 2), "", InsertBefore)); +} + +static inline ConstantInt *getTimerID(Module &M, enum hpvm_TimerID timer) { + return ConstantInt::get(Type::getInt32Ty(M.getContext()), timer); +} + +static Function *transformReturnTypeToStruct(Function *F) { + // Currently only works for void return types + DEBUG(errs() << "Transforming return type of function to Struct: " + << F->getName() << "\n"); + + if (isa<StructType>(F->getReturnType())) { + DEBUG(errs() << "Return type is already a Struct: " << F->getName() << ": " + << *F->getReturnType() << "\n"); + return F; + } + + assert(F->getReturnType()->isVoidTy() && + "Unhandled case - Only void return type handled\n"); + + // Create the argument type list with added argument types + std::vector<Type *> ArgTypes; + for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end(); + ai != ae; ++ai) { + ArgTypes.push_back(ai->getType()); + } + + StructType *RetTy = + StructType::create(F->getContext(), None, "emptyStruct", true); + FunctionType *FTy = FunctionType::get(RetTy, ArgTypes, F->isVarArg()); + + SmallVector<ReturnInst *, 8> Returns; + Function *newF = cloneFunction(F, FTy, false, &Returns); + // Replace ret void instruction with ret %RetTy undef + for (auto &RI : Returns) { + DEBUG(errs() << "Found return inst: " << *RI << "\n"); + ReturnInst *newRI = + ReturnInst::Create(newF->getContext(), UndefValue::get(RetTy)); + ReplaceInstWithInst(RI, newRI); + } + + replaceNodeFunctionInIR(*F->getParent(), F, newF); + return newF; +} + +static Type *getReturnTypeFromReturnInst(Function *F) { + for (BasicBlock &BB : *F) { + if (ReturnInst *RI = dyn_cast<ReturnInst>(BB.getTerminator())) { + DEBUG(errs() << "Return type value: " << *RI->getReturnValue()->getType() + << "\n"); + return RI->getReturnValue()->getType(); + } + } +} + +char genhpvm::GenHPVM::ID = 0; +static RegisterPass<genhpvm::GenHPVM> + X("genhpvm", + "Pass to generate HPVM IR from LLVM IR (with dummy function calls)", + false, false); + +} // End of namespace genhpvm diff --git a/hpvm/lib/Transforms/GenVISC/GenVISC.exports b/hpvm/lib/Transforms/GenHPVM/GenHPVM.exports similarity index 100% rename from hpvm/lib/Transforms/GenVISC/GenVISC.exports rename to hpvm/lib/Transforms/GenHPVM/GenHPVM.exports diff --git a/hpvm/lib/Transforms/GenVISC/LLVMBuild.txt b/hpvm/lib/Transforms/GenHPVM/LLVMBuild.txt similarity index 88% rename from hpvm/lib/Transforms/GenVISC/LLVMBuild.txt rename to hpvm/lib/Transforms/GenHPVM/LLVMBuild.txt index 9266b2c5972984a179beba227946964182761239..94ef73ac07ca5c1ff23a05e404b0ea1f751ef36c 100644 --- a/hpvm/lib/Transforms/GenVISC/LLVMBuild.txt +++ b/hpvm/lib/Transforms/GenHPVM/LLVMBuild.txt @@ -1,4 +1,4 @@ -;===- ./lib/Transforms/GenVISC/LLVMBuild.txt -------------------*- Conf -*--===; +;===- ./lib/Transforms/GenHPVM/LLVMBuild.txt -------------------*- Conf -*--===; ; ; The LLVM Compiler Infrastructure ; @@ -17,5 +17,5 @@ [component_0] type = Library -name = GenVISC +name = GenHPVM parent = Transforms diff --git a/hpvm/lib/Transforms/GenVISC/GenVISC.cpp b/hpvm/lib/Transforms/GenVISC/GenVISC.cpp deleted file mode 100644 index cc505415396b4a0441d5a5bfe0cf58adc945b9f8..0000000000000000000000000000000000000000 --- a/hpvm/lib/Transforms/GenVISC/GenVISC.cpp +++ /dev/null @@ -1,866 +0,0 @@ -//=== GenVISC.cpp - Implements "Hierarchical Dataflow Graph Builder Pass" ===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "genvisc" -#include "GenVISC/GenVISC.h" - -#include "llvm/ADT/Statistic.h" -#include "llvm/IR/CallSite.h" -#include "llvm/IR/InstIterator.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Support/SourceMgr.h" -#include "llvm/IRReader/IRReader.h" -#include "llvm/IR/DerivedTypes.h" -#include "SupportVISC/VISCHint.h" -#include "SupportVISC/VISCUtils.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Support/Debug.h" -#include "llvm/Transforms/Utils/ValueMapper.h" -#include "llvm/IR/Instructions.h" -#include "llvm/Transforms/Utils/Cloning.h" -#include "SupportVISC/VISCUtils.h" - - -#define TIMER(X) do { if (VISCTimer) { X; } } while (0) - -using namespace llvm; -using namespace viscUtils; - - -// VISC Command line option to use timer or not -static cl::opt<bool> -VISCTimer("visc-timers-gen", cl::desc("Enable GenVISC timer")); - -namespace genvisc { - -// Helper Functions - -static inline ConstantInt* getTimerID(Module&, enum visc_TimerID); -static Function* transformReturnTypeToStruct(Function* F); -static Type* getReturnTypeFromReturnInst(Function* F); - -// Check if the dummy function call is a __visc__node call -#define IS_VISC_CALL(callName) \ - static bool isVISCCall_##callName(Instruction* I) { \ - if(!isa<CallInst>(I)) \ - return false; \ - CallInst* CI = cast<CallInst>(I); \ - return (CI->getCalledValue()->stripPointerCasts()->getName()).equals("__visc__"#callName); \ - } - -static void ReplaceCallWithIntrinsic(Instruction* I, Intrinsic::ID IntrinsicID, std::vector<Instruction*>* Erase) { - // Check if the instruction is Call Instruction - assert(isa<CallInst>(I) && "Expecting CallInst"); - CallInst* CI = cast<CallInst>(I); - DEBUG(errs() << "Found call: " << *CI << "\n"); - - // Find the correct intrinsic call - Module* M = CI->getParent()->getParent()->getParent(); - Function* F; - std::vector<Type*> ArgTypes; - std::vector<Value*> args; - if(Intrinsic::isOverloaded(IntrinsicID)) { - // This is an overloaded intrinsic. The types must exactly match. Get the - // argument types - for(unsigned i=0; i < CI->getNumArgOperands(); i++) { - ArgTypes.push_back(CI->getArgOperand(i)->getType()); - args.push_back(CI->getArgOperand(i)); - } - F = Intrinsic::getDeclaration(M, IntrinsicID, ArgTypes); - DEBUG(errs() << *F << "\n"); - } - else { // Non-overloaded intrinsic - F = Intrinsic::getDeclaration(M, IntrinsicID); - FunctionType* FTy = F->getFunctionType(); - DEBUG(errs() << *F << "\n"); - - // Create argument list - assert(CI->getNumArgOperands() == FTy->getNumParams() - && "Number of arguments of call do not match with Intrinsic"); - for(unsigned i=0; i < CI->getNumArgOperands(); i++) { - Value* V = CI->getArgOperand(i); - // Either the type should match or both should be of pointer type - assert((V->getType() == FTy->getParamType(i) || - (V->getType()->isPointerTy() && FTy->getParamType(i)->isPointerTy())) - && "Dummy function call argument does not match with Intrinsic argument!"); - // If the types do not match, then both must be pointer type and pointer - // cast needs to be performed - if(V->getType() != FTy->getParamType(i)) { - V = CastInst::CreatePointerCast(V, FTy->getParamType(i), "", CI); - } - args.push_back(V); - } - } - // Insert call instruction - CallInst* Inst = CallInst::Create(F, args, F->getReturnType()->isVoidTy()? "" : CI->getName(), CI); - - DEBUG(errs() << "\tSubstitute with: " << *Inst << "\n"); - - CI->replaceAllUsesWith(Inst); - // If the previous instruction needs to be erased, insert it in the vector - // Erased - if(Erase != NULL) - Erase->push_back(CI); -} - -IS_VISC_CALL(launch) /* Exists but not required */ -IS_VISC_CALL(edge) /* Exists but not required */ -IS_VISC_CALL(createNodeND) -//IS_VISC_CALL(createNode) -//IS_VISC_CALL(createNode1D) -//IS_VISC_CALL(createNode2D) -//IS_VISC_CALL(createNode3D) -IS_VISC_CALL(bindIn) -IS_VISC_CALL(bindOut) -IS_VISC_CALL(push) -IS_VISC_CALL(pop) -IS_VISC_CALL(getNode) -IS_VISC_CALL(getParentNode) -IS_VISC_CALL(barrier) -IS_VISC_CALL(malloc) -IS_VISC_CALL(return) -IS_VISC_CALL(getNodeInstanceID_x) -IS_VISC_CALL(getNodeInstanceID_y) -IS_VISC_CALL(getNodeInstanceID_z) -IS_VISC_CALL(getNumNodeInstances_x) -IS_VISC_CALL(getNumNodeInstances_y) -IS_VISC_CALL(getNumNodeInstances_z) -// Atomics -IS_VISC_CALL(atomic_cmpxchg) -IS_VISC_CALL(atomic_add) -IS_VISC_CALL(atomic_sub) -IS_VISC_CALL(atomic_xchg) -IS_VISC_CALL(atomic_inc) -IS_VISC_CALL(atomic_dec) -IS_VISC_CALL(atomic_min) -IS_VISC_CALL(atomic_max) -IS_VISC_CALL(atomic_umin) -IS_VISC_CALL(atomic_umax) -IS_VISC_CALL(atomic_and) -IS_VISC_CALL(atomic_or) -IS_VISC_CALL(atomic_xor) -// Misc Fn -IS_VISC_CALL(floor) -IS_VISC_CALL(rsqrt) -IS_VISC_CALL(sqrt) -IS_VISC_CALL(sin) -IS_VISC_CALL(cos) - - -IS_VISC_CALL(init) -IS_VISC_CALL(cleanup) -IS_VISC_CALL(wait) -IS_VISC_CALL(trackMemory) -IS_VISC_CALL(untrackMemory) -IS_VISC_CALL(requestMemory) -IS_VISC_CALL(attributes) -IS_VISC_CALL(hint) - -// Return the constant integer represented by value V -static unsigned getNumericValue(Value* V) { - assert(isa<ConstantInt>(V) - && "Value indicating the number of arguments should be a constant integer"); - return cast<ConstantInt>(V)->getZExtValue(); -} - -// Take the __visc__return instruction and generate code for combining the -// values being returned into a struct and returning it. -// The first operand is the number of returned values -static Value* genCodeForReturn(CallInst* CI) { - LLVMContext& Ctx = CI->getContext(); - assert(isVISCCall_return(CI) - && "__visc__return instruction expected!"); - - // Parse the dummy function call here - assert(CI->getNumArgOperands() > 0 && "Too few arguments for __visc_return call!\n"); - unsigned numRetVals = getNumericValue(CI->getArgOperand(0)); - - assert(CI->getNumArgOperands()-1 == numRetVals && - "Too few arguments for __visc_return call!\n"); - DEBUG(errs() << "\tNum of return values = " << numRetVals << "\n"); - - std::vector<Type*> ArgTypes; - for(unsigned i=1; i < CI->getNumArgOperands(); i++) { - ArgTypes.push_back(CI->getArgOperand(i)->getType()); - } - Twine outTyName = "struct.out." + CI->getParent()->getParent()->getName(); - StructType* RetTy = StructType::create(Ctx, ArgTypes, outTyName.str(), true); - - InsertValueInst* IV = InsertValueInst::Create(UndefValue::get(RetTy), - CI->getArgOperand(1), - 0, - "returnStruct", - CI); - DEBUG(errs() << "Code generation for return:\n"); - DEBUG(errs() << *IV << "\n"); - - for(unsigned i=2; i < CI->getNumArgOperands(); i++) { - IV = InsertValueInst::Create(IV, - CI->getArgOperand(i), - i-1, - IV->getName(), - CI); - DEBUG(errs() << *IV << "\n"); - } - - return IV; -} - -// Analyse the attribute call for this function. Add the in and out -// attributes to pointer parameters. -static void handleVISCAttributes(Function* F, CallInst* CI) { - DEBUG(errs() << "Kernel before adding In/Out VISC attributes:\n" << *F << "\n"); - // Parse the dummy function call here - unsigned offset = 0; - // Find number of In pointers - assert(CI->getNumArgOperands() > offset - && "Too few arguments for __visc__attributes call!"); - unsigned numInPtrs = getNumericValue(CI->getArgOperand(offset)); - DEBUG(errs() << "\tNum of in pointers = " << numInPtrs << "\n"); - - for(unsigned i = offset+1; i< offset+1+numInPtrs; i++) { - Value* V = CI->getArgOperand(i); - if(Argument* arg = dyn_cast<Argument>(V)) { - F->addAttribute(1+arg->getArgNo(), Attribute::In); - } - else { - errs() << "Invalid argument to __visc__attribute: " << *V << "\n"; - llvm_unreachable("Only pointer arguments can be passed to __visc__attributes call"); - } - } - // Find number of Out Pointers - offset += 1 + numInPtrs; - assert(CI->getNumArgOperands() > offset - && "Too few arguments for __visc__attributes call!"); - unsigned numOutPtrs = getNumericValue(CI->getOperand(offset)); - DEBUG(errs() << "\tNum of out Pointers = " << numOutPtrs << "\n"); - for(unsigned i = offset+1; i< offset+1+numOutPtrs; i++) { - Value* V = CI->getArgOperand(i); - if(Argument* arg = dyn_cast<Argument>(V)) { - F->addAttribute(1+arg->getArgNo(), Attribute::Out); - } - else { - errs() << "Invalid argument to __visc__attribute: " << *V << "\n"; - llvm_unreachable("Only pointer arguments can be passed to __visc__attributes call"); - } - } - DEBUG(errs() << "Kernel after adding In/Out VISC attributes:\n" << *F << "\n"); -} - -// Public Functions of GenVISC pass -bool GenVISC::runOnModule(Module &M) { - errs() << "\nGENVISC PASS\n"; - this->M = &M; - - // Load Runtime API Module - SMDiagnostic Err; - - char* LLVM_SRC_ROOT = getenv("LLVM_SRC_ROOT"); - assert(LLVM_SRC_ROOT != NULL && - "Define LLVM_SRC_ROOT environment variable!"); - - Twine llvmSrcRoot = LLVM_SRC_ROOT; - Twine runtimeAPI = llvmSrcRoot + "/../build/tools/hpvm/projects/visc-rt/visc-rt.bc"; - errs() << llvmSrcRoot << "\n"; - - std::unique_ptr<Module> runtimeModule = parseIRFile(runtimeAPI.str(), Err, M.getContext()); - - if(runtimeModule == NULL) { - DEBUG(errs() << Err.getMessage() << " " << runtimeAPI << "\n"); - assert(false && "couldn't parse runtime"); - } - else - DEBUG(errs() << "Successfully loaded visc-rt API module\n"); - - llvm_visc_initializeTimerSet = M.getOrInsertFunction("llvm_visc_initializeTimerSet", - runtimeModule->getFunction("llvm_visc_initializeTimerSet")->getFunctionType()); - //DEBUG(errs() << *llvm_visc_initializeTimerSet); - - llvm_visc_switchToTimer = M.getOrInsertFunction("llvm_visc_switchToTimer", - runtimeModule->getFunction("llvm_visc_switchToTimer")->getFunctionType()); - // DEBUG(errs() << *llvm_visc_switchToTimer); - - llvm_visc_printTimerSet = M.getOrInsertFunction("llvm_visc_printTimerSet", - runtimeModule->getFunction("llvm_visc_printTimerSet")->getFunctionType()); - //DEBUG(errs() << *llvm_visc_printTimerSet); - - // Insert init context in main - DEBUG(errs() << "Locate __visc__init()\n"); - Function* VI = M.getFunction("__visc__init"); - assert(VI->getNumUses() == 1 && "__visc__init should only be used once"); - Instruction* I = cast<Instruction>(*VI->user_begin()); - - DEBUG(errs() << "Initialize Timer Set\n"); - initializeTimerSet(I); - switchToTimer(visc_TimerID_NONE, I); - - // Insert print instruction at visc exit - DEBUG(errs() << "Locate __visc__cleanup()\n"); - Function* VC = M.getFunction("__visc__cleanup"); - assert(VC->getNumUses() == 1 && "__visc__cleanup should only be used once"); - I = cast<Instruction>(*VC->user_begin()); - printTimerSet(I); - - - DEBUG(errs() << "-------- Searching for launch sites ----------\n"); - - std::vector<Instruction*> toBeErased; - std::vector<Function*> functions; - - for (auto &F : M) - functions.push_back(&F); - - // Iterate over all functions in the module - for (Function *f : functions) { - DEBUG(errs() << "Function: " << f->getName() << "\n"); - - // List with the required additions in the function's return type - std::vector<Type*> FRetTypes; - - enum mutateTypeCause { - mtc_None, - mtc_BIND, - mtc_RETURN, - mtc_NUM_CAUSES - } bind; - bind = mutateTypeCause::mtc_None; - - // Iterate over all the instructions in this function - for (inst_iterator i = inst_begin(f), e = inst_end(f); i != e ; ++i) { - Instruction* I = &*i; // Grab pointer to Instruction - // If not a call instruction, move to next instruction - if(!isa<CallInst>(I)) - continue; - - CallInst* CI = cast<CallInst>(I); - LLVMContext& Ctx = CI->getContext(); - - if(isVISCCall_init(I)) { - ReplaceCallWithIntrinsic(I, Intrinsic::visc_init, &toBeErased); - } - if(isVISCCall_cleanup(I)) { - ReplaceCallWithIntrinsic(I, Intrinsic::visc_cleanup, &toBeErased); - } - if(isVISCCall_wait(I)) { - ReplaceCallWithIntrinsic(I, Intrinsic::visc_wait, &toBeErased); - } - if(isVISCCall_trackMemory(I)) { - ReplaceCallWithIntrinsic(I, Intrinsic::visc_trackMemory, &toBeErased); - } - if(isVISCCall_untrackMemory(I)) { - ReplaceCallWithIntrinsic(I, Intrinsic::visc_untrackMemory, &toBeErased); - } - if(isVISCCall_requestMemory(I)) { - ReplaceCallWithIntrinsic(I, Intrinsic::visc_requestMemory, &toBeErased); - } - if(isVISCCall_hint(I)) { - assert(isa<ConstantInt>(CI->getArgOperand(0)) - && "Argument to hint must be constant integer!"); - ConstantInt* hint = cast<ConstantInt>(CI->getArgOperand(0)); - - visc::Target t = (visc::Target) hint->getZExtValue(); - addHint(CI->getParent()->getParent(), t); - DEBUG(errs() << "Found visc hint call: " << *CI << "\n"); - toBeErased.push_back(CI); - } - if(isVISCCall_launch(I)) { - Function* LaunchF = Intrinsic::getDeclaration(&M, Intrinsic::visc_launch); - DEBUG(errs() << *LaunchF << "\n"); - // Get i8* cast to function pointer - Function* graphFunc = cast<Function>(CI->getArgOperand(1)); - graphFunc = transformReturnTypeToStruct(graphFunc); - Constant* F = ConstantExpr::getPointerCast(graphFunc, Type::getInt8PtrTy(Ctx)); - assert(F && "Function invoked by VISC launch has to be define and constant."); - - ConstantInt* Op = cast<ConstantInt>(CI->getArgOperand(0)); - assert(Op && "VISC launch's streaming argument is a constant value."); - Value* isStreaming = Op->isZero()? ConstantInt::getFalse(Ctx) - : ConstantInt::getTrue(Ctx); - - auto *ArgTy = dyn_cast<PointerType>(CI->getArgOperand(2)->getType()); - assert(ArgTy && "VISC launch argument should be pointer type."); - Value *Arg = CI->getArgOperand(2); - if(!ArgTy->getElementType()->isIntegerTy(8)) - Arg = BitCastInst::CreatePointerCast(CI->getArgOperand(2), Type::getInt8PtrTy(Ctx), "", CI); - Value* LaunchArgs[] = {F, Arg, isStreaming}; - CallInst* LaunchInst = CallInst::Create(LaunchF, - ArrayRef<Value*>(LaunchArgs, 3), - "graphID", CI); - DEBUG(errs() << "Found visc launch call: " << *CI << "\n"); - DEBUG(errs() << "\tSubstitute with: " << *LaunchInst << "\n"); - CI->replaceAllUsesWith(LaunchInst); - toBeErased.push_back(CI); - } - if(isVISCCall_push(I)) { - ReplaceCallWithIntrinsic(I, Intrinsic::visc_push, &toBeErased); - } - if(isVISCCall_pop(I)) { - ReplaceCallWithIntrinsic(I, Intrinsic::visc_pop, &toBeErased); - } - if(isVISCCall_createNodeND(I)) { - assert(CI->getNumArgOperands() > 0 && - "Too few arguments for __visc__createNodeND call"); - unsigned numDims = getNumericValue(CI->getArgOperand(0)); - // We need as meny dimension argments are there are dimensions - assert(CI->getNumArgOperands()-2 == numDims && - "Too few arguments for __visc_createNodeND call!\n"); - - Function* CreateNodeF; - switch (numDims) { - case 0: - CreateNodeF = Intrinsic::getDeclaration(&M, Intrinsic::visc_createNode); - break; - case 1: - CreateNodeF = Intrinsic::getDeclaration(&M, Intrinsic::visc_createNode1D); - break; - case 2: - CreateNodeF = Intrinsic::getDeclaration(&M, Intrinsic::visc_createNode2D); - break; - case 3: - CreateNodeF = Intrinsic::getDeclaration(&M, Intrinsic::visc_createNode3D); - break; - default: - llvm_unreachable("Unsupported number of dimensions\n"); - break; - } - DEBUG(errs() << *CreateNodeF << "\n"); - DEBUG(errs() << *I << "\n"); - DEBUG(errs() << "in " << I->getParent()->getParent()->getName() << "\n"); - - // Get i8* cast to function pointer - Function* graphFunc = cast<Function>(CI->getArgOperand(1)); - graphFunc = transformReturnTypeToStruct(graphFunc); - Constant* F = ConstantExpr::getPointerCast(graphFunc, Type::getInt8PtrTy(Ctx)); - - CallInst* CreateNodeInst; - switch (numDims) { - case 0: - CreateNodeInst = CallInst::Create(CreateNodeF, - ArrayRef<Value*>(F), - graphFunc->getName()+".node", CI); - break; - case 1: - { - assert((CI->getArgOperand(2)->getType() == Type::getInt64Ty(Ctx)) && - "CreateNodeND dimension argument, 2, expected to be i64\n"); - Value* CreateNodeArgs[] = {F, CI->getArgOperand(2)}; - CreateNodeInst = CallInst::Create(CreateNodeF, - ArrayRef<Value*>(CreateNodeArgs, 2), - graphFunc->getName()+".node", CI); - } - break; - case 2: - { - assert((CI->getArgOperand(2)->getType() == Type::getInt64Ty(Ctx)) && - "CreateNodeND dimension argument, 2, expected to be i64\n"); - assert((CI->getArgOperand(3)->getType() == Type::getInt64Ty(Ctx)) && - "CreateNodeND dimension argument, 3, expected to be i64\n"); - Value* CreateNodeArgs[] = {F, - CI->getArgOperand(2), - CI->getArgOperand(3)}; - CreateNodeInst = CallInst::Create(CreateNodeF, - ArrayRef<Value*>(CreateNodeArgs, 3), - graphFunc->getName()+".node", CI); - } - break; - case 3: - { - assert((CI->getArgOperand(2)->getType() == Type::getInt64Ty(Ctx)) && - "CreateNodeND dimension argument, 2, expected to be i64\n"); - assert((CI->getArgOperand(3)->getType() == Type::getInt64Ty(Ctx)) && - "CreateNodeND dimension argument, 3, expected to be i64\n"); - assert((CI->getArgOperand(4)->getType() == Type::getInt64Ty(Ctx)) && - "CreateNodeND dimension argument, 4, expected to be i64\n"); - Value* CreateNodeArgs[] = {F, - CI->getArgOperand(2), - CI->getArgOperand(3), - CI->getArgOperand(4)}; - CreateNodeInst = CallInst::Create(CreateNodeF, - ArrayRef<Value*>(CreateNodeArgs, 4), - graphFunc->getName()+".node", CI); - } - break; - default: - llvm_unreachable("Impossible path: number of dimensions is 0, 1, 2, 3\n"); - break; - } - - DEBUG(errs() << "Found visc createNode call: " << *CI << "\n"); - DEBUG(errs() << "\tSubstitute with: " << *CreateNodeInst << "\n"); - CI->replaceAllUsesWith(CreateNodeInst); - toBeErased.push_back(CI); - } - - if(isVISCCall_edge(I)) { - Function* EdgeF = Intrinsic::getDeclaration(&M, Intrinsic::visc_createEdge); - DEBUG(errs() << *EdgeF << "\n"); - ConstantInt* Op = cast<ConstantInt>(CI->getArgOperand(5)); - ConstantInt* EdgeTypeOp = cast<ConstantInt>(CI->getArgOperand(2)); - assert(Op && EdgeTypeOp && "Arguments of CreateEdge are not constant integers."); - Value* isStreaming = Op->isZero()? ConstantInt::getFalse(Ctx) - : ConstantInt::getTrue(Ctx); - Value* isAllToAll = EdgeTypeOp->isZero()? ConstantInt::getFalse(Ctx) - : ConstantInt::getTrue(Ctx); - Value* EdgeArgs[] = {CI->getArgOperand(0), CI->getArgOperand(1), - isAllToAll, CI->getArgOperand(3), CI->getArgOperand(4), - isStreaming - }; - CallInst* EdgeInst = CallInst::Create(EdgeF, - ArrayRef<Value*>(EdgeArgs, 6), - "output", CI); - DEBUG(errs() << "Found visc edge call: " << *CI << "\n"); - DEBUG(errs() << "\tSubstitute with: " << *EdgeInst << "\n"); - CI->replaceAllUsesWith(EdgeInst); - toBeErased.push_back(CI); - } - if(isVISCCall_bindIn(I)) { - Function* BindInF = Intrinsic::getDeclaration(&M, Intrinsic::visc_bind_input); - DEBUG(errs() << *BindInF << "\n"); - // Check if this is a streaming bind or not - ConstantInt* Op = cast<ConstantInt>(CI->getArgOperand(3)); - assert(Op && "Streaming argument for bind in intrinsic should be a constant integer."); - Value* isStreaming = Op->isZero()? ConstantInt::getFalse(Ctx) - : ConstantInt::getTrue(Ctx); - Value* BindInArgs[] = {CI->getArgOperand(0), CI->getArgOperand(1), - CI->getArgOperand(2), isStreaming - }; - CallInst* BindInInst = CallInst::Create(BindInF, - ArrayRef<Value*>(BindInArgs, 4), - "", CI); - DEBUG(errs() << "Found visc bindIn call: " << *CI << "\n"); - DEBUG(errs() << "\tSubstitute with: " << *BindInInst << "\n"); - CI->replaceAllUsesWith(BindInInst); - toBeErased.push_back(CI); - } - if(isVISCCall_bindOut(I)) { - Function* BindOutF = Intrinsic::getDeclaration(&M, Intrinsic::visc_bind_output); - DEBUG(errs() << *BindOutF << "\n"); - // Check if this is a streaming bind or not - ConstantInt* Op = cast<ConstantInt>(CI->getArgOperand(3)); - assert(Op && "Streaming argument for bind out intrinsic should be a constant integer."); - Value* isStreaming = Op->isZero()? ConstantInt::getFalse(Ctx) - : ConstantInt::getTrue(Ctx); - Value* BindOutArgs[] = {CI->getArgOperand(0), CI->getArgOperand(1), - CI->getArgOperand(2), isStreaming - }; - CallInst* BindOutInst = CallInst::Create(BindOutF, - ArrayRef<Value*>(BindOutArgs, 4), - "", CI); - DEBUG(errs() << "Found visc bindOut call: " << *CI << "\n"); - DEBUG(errs() << "\tSubstitute with: " << *BindOutInst << "\n"); - - DEBUG(errs() << "Fixing the return type of the function\n"); - // FIXME: What if the child node function has not been visited already. - // i.e., it's return type has not been fixed. - Function* F = I->getParent()->getParent(); - DEBUG(errs() << F->getName() << "\n";); - IntrinsicInst* NodeIntrinsic = cast<IntrinsicInst>(CI->getArgOperand(0)); - assert(NodeIntrinsic && "Instruction value in bind out is not a create node intrinsic."); - DEBUG(errs() << "Node intrinsic: " << *NodeIntrinsic << "\n"); - assert((NodeIntrinsic->getIntrinsicID() == Intrinsic::visc_createNode || - NodeIntrinsic->getIntrinsicID() == Intrinsic::visc_createNode1D || - NodeIntrinsic->getIntrinsicID() == Intrinsic::visc_createNode2D || - NodeIntrinsic->getIntrinsicID() == Intrinsic::visc_createNode3D) && - "Instruction value in bind out is not a create node intrinsic."); - Function* ChildF = cast<Function>(NodeIntrinsic->getArgOperand(0)->stripPointerCasts()); - DEBUG(errs() << ChildF->getName() << "\n";); - int srcpos = cast<ConstantInt>(CI->getArgOperand(1))->getSExtValue(); - int destpos = cast<ConstantInt>(CI->getArgOperand(2))->getSExtValue(); - StructType* ChildReturnTy = cast<StructType>(ChildF->getReturnType()); - - Type* ReturnType = F->getReturnType(); - DEBUG(errs() << *ReturnType << "\n";); - assert((ReturnType->isVoidTy() || isa<StructType>(ReturnType)) - && "Return type should either be a struct or void type!"); - - FRetTypes.insert(FRetTypes.begin()+destpos, ChildReturnTy->getElementType(srcpos)); - assert(((bind == mutateTypeCause::mtc_BIND) || - (bind == mutateTypeCause::mtc_None)) && - "Both bind_out and visc_return detected"); - bind = mutateTypeCause::mtc_BIND; - - CI->replaceAllUsesWith(BindOutInst); - toBeErased.push_back(CI); - } - if(isVISCCall_attributes(I)) { - Function* F = CI->getParent()->getParent(); - handleVISCAttributes(F, CI); - toBeErased.push_back(CI); - } - if (isVISCCall_getNode(I)) { - ReplaceCallWithIntrinsic(I, Intrinsic::visc_getNode, &toBeErased); - } - if (isVISCCall_getParentNode(I)) { - ReplaceCallWithIntrinsic(I, Intrinsic::visc_getParentNode, &toBeErased); - } - if (isVISCCall_barrier(I)) { - ReplaceCallWithIntrinsic(I, Intrinsic::visc_barrier, &toBeErased); - } - if (isVISCCall_malloc(I)) { - ReplaceCallWithIntrinsic(I, Intrinsic::visc_malloc, &toBeErased); - } - if (isVISCCall_return(I)) { - DEBUG(errs() << "Function before visc return processing\n" << *I->getParent()->getParent() << "\n"); - // The operands to this call are the values to be returned by the node - Value* ReturnVal = genCodeForReturn(CI); - DEBUG(errs() << *ReturnVal << "\n"); - Type* ReturnType = ReturnVal->getType(); - assert(isa<StructType>(ReturnType) - && "Return type should be a struct type!"); - - assert(((bind == mutateTypeCause::mtc_RETURN) || - (bind == mutateTypeCause::mtc_None)) && - "Both bind_out and visc_return detected"); - - if (bind == mutateTypeCause::mtc_None) { - // If this is None, this is the first __visc__return - // instruction we have come upon. Place the return type of the - // function in the return type vector - bind = mutateTypeCause::mtc_RETURN; - StructType* ReturnStructTy = cast<StructType>(ReturnType); - for (unsigned i = 0; i < ReturnStructTy->getNumElements(); i++) - FRetTypes.push_back(ReturnStructTy->getElementType(i)); - } else { // bind == mutateTypeCause::mtc_RETURN - // This is not the first __visc__return - // instruction we have come upon. - // Check that the return types are the same - assert((ReturnType == FRetTypes[0]) - && "Multiple returns with mismatching types"); - } - - ReturnInst* RetInst = ReturnInst::Create(Ctx, ReturnVal); - DEBUG(errs() << "Found visc return call: " << *CI << "\n"); - Instruction* oldReturn = CI->getParent()->getTerminator(); - assert(isa<ReturnInst>(oldReturn) - && "Expecting a return to be the terminator of this BB!"); - DEBUG(errs() << "Found return statement of BB: " << *oldReturn << "\n"); - DEBUG(errs() << "\tSubstitute return with: " << *RetInst << "\n"); - //CI->replaceAllUsesWith(RetInst); - toBeErased.push_back(CI); - ReplaceInstWithInst(oldReturn, RetInst); - DEBUG(errs() << "Function after visc return processing\n" << *I->getParent()->getParent() << "\n"); - } - - if (isVISCCall_getNodeInstanceID_x(I)) { - ReplaceCallWithIntrinsic(I, Intrinsic::visc_getNodeInstanceID_x, &toBeErased); - } - if (isVISCCall_getNodeInstanceID_y(I)) { - ReplaceCallWithIntrinsic(I, Intrinsic::visc_getNodeInstanceID_y, &toBeErased); - } - if (isVISCCall_getNodeInstanceID_z(I)) { - ReplaceCallWithIntrinsic(I, Intrinsic::visc_getNodeInstanceID_z, &toBeErased); - } - if (isVISCCall_getNumNodeInstances_x(I)) { - ReplaceCallWithIntrinsic(I, Intrinsic::visc_getNumNodeInstances_x, &toBeErased); - } - if (isVISCCall_getNumNodeInstances_y(I)) { - ReplaceCallWithIntrinsic(I, Intrinsic::visc_getNumNodeInstances_y, &toBeErased); - } - if (isVISCCall_getNumNodeInstances_z(I)) { - ReplaceCallWithIntrinsic(I, Intrinsic::visc_getNumNodeInstances_z, &toBeErased); - } - if (isVISCCall_atomic_add(I)) { - ReplaceCallWithIntrinsic(I, Intrinsic::visc_atomic_add, &toBeErased); - } - if (isVISCCall_atomic_sub(I)) { - ReplaceCallWithIntrinsic(I, Intrinsic::visc_atomic_sub, &toBeErased); - } - if (isVISCCall_atomic_xchg(I)) { - ReplaceCallWithIntrinsic(I, Intrinsic::visc_atomic_xchg, &toBeErased); - } - if (isVISCCall_atomic_min(I)) { - ReplaceCallWithIntrinsic(I, Intrinsic::visc_atomic_min, &toBeErased); - } - if (isVISCCall_atomic_max(I)) { - ReplaceCallWithIntrinsic(I, Intrinsic::visc_atomic_max, &toBeErased); - } - if (isVISCCall_atomic_and(I)) { - ReplaceCallWithIntrinsic(I, Intrinsic::visc_atomic_and, &toBeErased); - } - if (isVISCCall_atomic_or(I)) { - ReplaceCallWithIntrinsic(I, Intrinsic::visc_atomic_or, &toBeErased); - } - if (isVISCCall_atomic_xor(I)) { - ReplaceCallWithIntrinsic(I, Intrinsic::visc_atomic_xor, &toBeErased); - } - if (isVISCCall_sin(I)) { - ReplaceCallWithIntrinsic(I, Intrinsic::sin, &toBeErased); - } - if (isVISCCall_cos(I)) { - ReplaceCallWithIntrinsic(I, Intrinsic::cos, &toBeErased); - } - } - - // Erase the __visc__node calls - DEBUG(errs() << "Erase " << toBeErased.size() << " Statements:\n"); - for(auto I: toBeErased) { - DEBUG(errs() << *I << "\n"); - } - while(!toBeErased.empty()) { - Instruction* I = toBeErased.back(); - DEBUG(errs() << "\tErasing " << *I << "\n"); - I->eraseFromParent(); - toBeErased.pop_back(); - } - - if(bind == mutateTypeCause::mtc_BIND || bind == mutateTypeCause::mtc_RETURN) { - DEBUG(errs() << "Function before fixing return type\n" << *f << "\n"); - // Argument type list. - std::vector<Type*> FArgTypes; - for(Function::const_arg_iterator ai = f->arg_begin(), ae = f->arg_end(); - ai != ae; ++ai) { - FArgTypes.push_back(ai->getType()); - } - - // Find new return type of function - Type* NewReturnTy; - if(bind == mutateTypeCause::mtc_BIND) { - - std::vector<Type*> TyList; - for (unsigned i = 0; i < FRetTypes.size(); i++) - TyList.push_back(FRetTypes[i]); - - NewReturnTy = StructType::create(f->getContext(), TyList, Twine("struct.out."+f->getName()).str(), true); - } - else { - NewReturnTy = getReturnTypeFromReturnInst(f); - assert(NewReturnTy->isStructTy() && "Expecting a struct type!"); - } - - FunctionType* FTy = FunctionType::get(NewReturnTy, FArgTypes, f->isVarArg()); - - // Change the function type - Function* newF = cloneFunction(f, FTy, false); - DEBUG(errs() << *newF << "\n"); - - if (bind == mutateTypeCause::mtc_BIND) { - // This is certainly an internal node, and hence just one BB with one - // return terminator instruction. Change return statement - ReturnInst* RI = cast<ReturnInst>(newF->getEntryBlock().getTerminator()); - ReturnInst* newRI = ReturnInst::Create(newF->getContext(), UndefValue::get(NewReturnTy)); - ReplaceInstWithInst(RI, newRI); - } - if (bind == mutateTypeCause::mtc_RETURN) { - // Nothing - } - replaceNodeFunctionInIR(*f->getParent(), f, newF); - DEBUG(errs() << "Function after fixing return type\n" << *newF << "\n"); - } - - - } - return false; //TODO: What does returning "false" mean? -} - -// Generate Code for declaring a constant string [L x i8] and return a pointer -// to the start of it. -Value* GenVISC::getStringPointer(const Twine& S, Instruction* IB, const Twine& Name) { - Constant* SConstant = ConstantDataArray::getString(M->getContext(), S.str(), true); - Value* SGlobal = new GlobalVariable(*M, SConstant->getType(), true, - GlobalValue::InternalLinkage, SConstant, Name); - Value* Zero = ConstantInt::get(Type::getInt64Ty(M->getContext()), 0); - Value* GEPArgs[] = {Zero, Zero}; - GetElementPtrInst* SPtr = GetElementPtrInst::Create(nullptr, SGlobal, - ArrayRef<Value*>(GEPArgs, 2), Name+"Ptr", IB); - return SPtr; -} - -void GenVISC::initializeTimerSet(Instruction* InsertBefore) { - Value* TimerSetAddr; - StoreInst* SI; - TIMER(TimerSet = new GlobalVariable(*M, - Type::getInt8PtrTy(M->getContext()), - false, - GlobalValue::CommonLinkage, - Constant::getNullValue(Type::getInt8PtrTy(M->getContext())), - "viscTimerSet_GenVISC")); - DEBUG(errs() << "Inserting GV: " << *TimerSet->getType() << *TimerSet << "\n"); - //DEBUG(errs() << "Inserting call to: " << *llvm_visc_initializeTimerSet << "\n"); - - TIMER(TimerSetAddr = CallInst::Create(llvm_visc_initializeTimerSet, - None, - "", - InsertBefore)); - DEBUG(errs() << "TimerSetAddress = " << *TimerSetAddr << "\n"); - TIMER(SI = new StoreInst(TimerSetAddr, TimerSet, InsertBefore)); - DEBUG(errs() << "Store Timer Address in Global variable: " << *SI << "\n"); -} - -void GenVISC::switchToTimer(enum visc_TimerID timer, Instruction* InsertBefore) { - Value* switchArgs[] = {TimerSet, getTimerID(*M, timer)}; - TIMER(CallInst::Create(llvm_visc_switchToTimer, - ArrayRef<Value*>(switchArgs, 2), - "", - InsertBefore)); -} - -void GenVISC::printTimerSet(Instruction* InsertBefore) { - Value* TimerName; - TIMER(TimerName = getStringPointer("GenVISC_Timer", InsertBefore)); - Value* printArgs[] = {TimerSet, TimerName}; - TIMER(CallInst::Create(llvm_visc_printTimerSet, - ArrayRef<Value*>(printArgs, 2), - "", - InsertBefore)); -} - -static inline ConstantInt* getTimerID(Module& M, enum visc_TimerID timer) { - return ConstantInt::get(Type::getInt32Ty(M.getContext()), timer); -} - -static Function* transformReturnTypeToStruct(Function* F) { - // Currently only works for void return types - DEBUG(errs() << "Transforming return type of function to Struct: " << F->getName() << "\n"); - - if (isa<StructType>(F->getReturnType())) { - DEBUG(errs() << "Return type is already a Struct: " << F->getName() << ": " << *F->getReturnType() << "\n"); - return F; - } - - assert(F->getReturnType()->isVoidTy() && "Unhandled case - Only void return type handled\n"); - - // Create the argument type list with added argument types - std::vector<Type*> ArgTypes; - for(Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end(); - ai != ae; ++ai) { - ArgTypes.push_back(ai->getType()); - } - - StructType* RetTy = StructType::create(F->getContext(), None, "emptyStruct", true); - FunctionType* FTy = FunctionType::get(RetTy, ArgTypes, F->isVarArg()); - - SmallVector<ReturnInst*, 8> Returns; - Function* newF = cloneFunction(F, FTy, false, &Returns); - // Replace ret void instruction with ret %RetTy undef - for(auto &RI: Returns) { - DEBUG(errs() << "Found return inst: "<< *RI << "\n"); - ReturnInst* newRI = ReturnInst::Create(newF->getContext(), UndefValue::get(RetTy)); - ReplaceInstWithInst(RI, newRI); - } - - replaceNodeFunctionInIR(*F->getParent(), F, newF); - return newF; -} - -static Type* getReturnTypeFromReturnInst(Function* F) { - for(BasicBlock &BB: *F) { - if(ReturnInst* RI = dyn_cast<ReturnInst>(BB.getTerminator())) { - DEBUG(errs() << "Return type value: " << *RI->getReturnValue()->getType() << "\n"); - return RI->getReturnValue()->getType(); - } - } -} - - -char genvisc::GenVISC::ID = 0; -static RegisterPass<genvisc::GenVISC> X("genvisc", "Pass to generate VISC IR from LLVM IR (with dummy function calls)", false, false); - -} // End of namespace genvisc - - diff --git a/hpvm/lib/Transforms/LocalMem/LocalMem.cpp b/hpvm/lib/Transforms/LocalMem/LocalMem.cpp index 7bd66b62c6c8cda589fe3e6c1e3711893aceaffb..fc33ebee71123d89c5f931901dd213c82a401941 100644 --- a/hpvm/lib/Transforms/LocalMem/LocalMem.cpp +++ b/hpvm/lib/Transforms/LocalMem/LocalMem.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "LocalMem" -#include "SupportVISC/DFG2LLVM.h" +#include "SupportHPVM/DFG2LLVM.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/InstIterator.h" @@ -134,7 +134,7 @@ void AT_OCL::codeGen(DFLeafNode *N) { // Return pointer to property if this leaf node matches the conditions for being // an allocation node. Conditions // 1. No incoming memory pointer. No in/out attribute on a pointer argument -// 2. Uses visc malloc intrinsic to allocate memory +// 2. Uses hpvm malloc intrinsic to allocate memory // 3. Sends it out // 2. (TODO:) Whether the allocated pointer escapes the parent node AllocationNodeProperty *isAllocationNode(DFLeafNode *N) { @@ -148,18 +148,18 @@ AllocationNodeProperty *isAllocationNode(DFLeafNode *N) { Function *F = N->getFuncPointer(); - // Allocation node must use visc malloc intrinsic - bool usesVISCMalloc = false; + // Allocation node must use hpvm malloc intrinsic + bool usesHPVMMalloc = false; for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; i++) { Instruction *I = &*i; if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { - if (II->getIntrinsicID() == Intrinsic::visc_malloc) { - usesVISCMalloc = true; + if (II->getIntrinsicID() == Intrinsic::hpvm_malloc) { + usesHPVMMalloc = true; break; } } } - if (!usesVISCMalloc) + if (!usesHPVMMalloc) return NULL; // TODO: Check if allocated pointer leaves parent node @@ -197,20 +197,20 @@ AllocationNodeProperty *isAllocationNode(DFLeafNode *N) { assert(OutValues[i]->getType()->isPointerTy() && "Expected outgoing edge to be of pointer type"); if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(OutValues[i])) { - if (II->getIntrinsicID() == Intrinsic::visc_malloc) { + if (II->getIntrinsicID() == Intrinsic::hpvm_malloc) { // Sanity check: Size passed to malloc intrinsic is same as the value // going into the next outgoing edge - DEBUG(errs() << "Visc malloc size: " << *II->getArgOperand(0) << "\n"); + DEBUG(errs() << "HPVM malloc size: " << *II->getArgOperand(0) << "\n"); DEBUG(errs() << "Out edge value: " << *OutValues[i + 1] << "\n"); assert(II->getArgOperand(0) == OutValues[i + 1] && - "Sanity Check Failed: VISC Malloc size argument != next " + "Sanity Check Failed: HPVM Malloc size argument != next " "outgoing edge"); ANP->insertAllocation(N->getOutDFEdgeAt(i), II->getArgOperand(0)); i = i + 2; continue; } } - llvm_unreachable("Expecting visc malloc intrinsic instruction!"); + llvm_unreachable("Expecting hpvm malloc intrinsic instruction!"); } return ANP; } diff --git a/hpvm/llvm_installer/llvm_installer.sh b/hpvm/llvm_installer/llvm_installer.sh index d7fcda4ac4de8c129e47cfce65264097e040d228..e072d042b79a1a3caf8003794a89b5cee2dca67a 100755 --- a/hpvm/llvm_installer/llvm_installer.sh +++ b/hpvm/llvm_installer/llvm_installer.sh @@ -179,10 +179,10 @@ echo make -j$NUM_THREADS make -j$NUM_THREADS #make install -#echo Building HPVM runtime -#HPVM_RT_DIR=$HPVM_DIR/projects/visc-rt -#cd $HPVM_RT_DIR -#make +# echo Building HPVM runtime +# HPVM_RT_DIR=$HPVM_DIR/projects/hpvm-rt +# cd $HPVM_RT_DIR +# make #cp -r $CURRENT_DIR/projects $HPVM_DIR/ #make -j$NUM_THREADS diff --git a/hpvm/llvm_patches/apply_patch.sh b/hpvm/llvm_patches/apply_patch.sh index ea86575207a4aa7b4ca138b604f7423943924b22..289e5c11e319aa16262952d2d079f986c2e987b8 100644 --- a/hpvm/llvm_patches/apply_patch.sh +++ b/hpvm/llvm_patches/apply_patch.sh @@ -1,7 +1,7 @@ #!/bin/sh ### File Copies -cp include/IR/IntrinsicsVISC.td ${LLVM_SRC_ROOT}/include/llvm/IR/IntrinsicsVISC.td +cp include/IR/IntrinsicsHPVM.td ${LLVM_SRC_ROOT}/include/llvm/IR/IntrinsicsHPVM.td ## Header File Patches diff --git a/hpvm/llvm_patches/include/IR/Attributes.td b/hpvm/llvm_patches/include/IR/Attributes.td index b644cdb30bbd590a8b8c238bfde15e4b451e8ea3..c6ff8ef3c6c962f5444d718ff5a7e16ce392a522 100644 --- a/hpvm/llvm_patches/include/IR/Attributes.td +++ b/hpvm/llvm_patches/include/IR/Attributes.td @@ -151,7 +151,7 @@ def ShadowCallStack : EnumAttr<"shadowcallstack">; /// Sign extended before/after call. def SExt : EnumAttr<"signext">; -/// VISC Attributes +/// HPVM Attributes /// Pointer to read only memory def In : EnumAttr<"in">; diff --git a/hpvm/llvm_patches/include/IR/Intrinsics.td b/hpvm/llvm_patches/include/IR/Intrinsics.td index 2f79964a2e381c6d4ec22a5bc3c80a9d411f9fb0..2e3f34eb1a8408371a0b516089dd970adfe9223c 100644 --- a/hpvm/llvm_patches/include/IR/Intrinsics.td +++ b/hpvm/llvm_patches/include/IR/Intrinsics.td @@ -1249,4 +1249,4 @@ include "llvm/IR/IntrinsicsBPF.td" include "llvm/IR/IntrinsicsSystemZ.td" include "llvm/IR/IntrinsicsWebAssembly.td" include "llvm/IR/IntrinsicsRISCV.td" -include "llvm/IR/IntrinsicsVISC.td" +include "llvm/IR/IntrinsicsHPVM.td" diff --git a/hpvm/llvm_patches/include/IR/IntrinsicsHPVM.td b/hpvm/llvm_patches/include/IR/IntrinsicsHPVM.td new file mode 100644 index 0000000000000000000000000000000000000000..410e9c8d3345e67df9614e0d518e5e596a4368e1 --- /dev/null +++ b/hpvm/llvm_patches/include/IR/IntrinsicsHPVM.td @@ -0,0 +1,208 @@ +//===- IntrinsicsHPVM.td - Defines HPVM intrinsics ---------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines all of the HPVM-specific intrinsics. +// +//===----------------------------------------------------------------------===// + +let TargetPrefix = "hpvm" in { + /* All intrinsics start with "llvm.hpvm." + * As we do not want the compiler to mess with these intrinsics, we assume + * worst memory behavior for all these intrinsics. + */ + + /* Initialization intrinsic - + * i8* llvm.hpvm.setup(function*); + */ + def int_hpvm_init : Intrinsic<[], [], []>; + + /* Launch intrinsic - with streaming argument + * i8* llvm.hpvm.launch(i8*, ArgList*, i1); + */ + def int_hpvm_launch : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, + llvm_ptr_ty, llvm_i1_ty], []>; + + /* Push intrinsic - push data on streaming pipeline + * void llvm.hpvm.push(i8*, ArgList*); + */ + def int_hpvm_push : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty], []>; + + /* Pop intrinsic - pop data from streaming pipeline + * i8* llvm.hpvm.pop(i8*); + */ + def int_hpvm_pop : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], []>; + + /* Cleanup intrinsic - + * void llvm.hpvm.cleanup(i8*); + */ + def int_hpvm_cleanup : Intrinsic<[], [], []>; + + /* Wait intrinsic - + * void llvm.hpvm.wait(graphID*); + */ + def int_hpvm_wait : Intrinsic<[], [llvm_ptr_ty], []>; + + /* Track memory intrinsic - + * void llvm.hpvm.trackMemory(i8*, i64); + */ + def int_hpvm_trackMemory : Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty], []>; + + /* Track memory intrinsic - + * void llvm.hpvm.untrackMemory(i8*); + */ + def int_hpvm_untrackMemory : Intrinsic<[], [llvm_ptr_ty], []>; + + /* Request memory intrinsic - + * void llvm.hpvm.requestMemory(i8*, i64); + */ + def int_hpvm_requestMemory : Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty], []>; + + /* Create Node intrinsic - + * i8* llvm.hpvm.createNode(function*); + */ + def int_hpvm_createNode : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], []>; + + /* Create Node 1D array intrinsic - + * i8* llvm.hpvm.createNode1D(function*, i64); + */ + def int_hpvm_createNode1D : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, + llvm_i64_ty], []>; + + /* Create Node 2D array intrinsic - + * i8* llvm.hpvm.createNode2D(function*, i64, i64); + */ + def int_hpvm_createNode2D : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, + llvm_i64_ty, llvm_i64_ty], []>; + + /* Create Node 3D array intrinsic - + * i8* llvm.hpvm.createNode2D(function*, i64, i64, i64); + */ + def int_hpvm_createNode3D : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, + llvm_i64_ty, llvm_i64_ty, llvm_i64_ty], + []>; + + /* Create dataflow edge intrinsic - + * i8* llvm.hpvm.createEdge(i8*, i8*, i1, i32, i32, i1); + */ + def int_hpvm_createEdge : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_ptr_ty, + llvm_i1_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i1_ty], + []>; + + /* Create bind input intrinsic - + * void llvm.hpvm.bind.input(i8*, i32, i32); + */ + def int_hpvm_bind_input : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i1_ty], []>; + + /* Create bind output intrinsic - + * void llvm.hpvm.bind.output(i8*, i32, i32); + */ + def int_hpvm_bind_output : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i1_ty], []>; + + /* Find associated dataflow node intrinsic - + * i8* llvm.hpvm.getNode(); + */ + def int_hpvm_getNode : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>; + + /* Find parent dataflow node intrinsic - + * i8* llvm.hpvm.getParentNode(i8*); + */ + def int_hpvm_getParentNode : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], [IntrNoMem]>; + + /* Find the number of dimensions of a dataflow node intrinsic - + * i32 llvm.hpvm.getNumDims(i8*); + */ + def int_hpvm_getNumDims : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrNoMem]>; + + /* Find the unique indentifier of a dataflow node (with respect to his parent + * node) in the specified dimension intrinsic - + */ + + /* i64 llvm.hpvm.getNodeInstanceID.[xyz](i8*); + */ + def int_hpvm_getNodeInstanceID_x : Intrinsic<[llvm_i64_ty], [llvm_ptr_ty], + [IntrNoMem]>; + + def int_hpvm_getNodeInstanceID_y : Intrinsic<[llvm_i64_ty], [llvm_ptr_ty], + [IntrNoMem]>; + + def int_hpvm_getNodeInstanceID_z : Intrinsic<[llvm_i64_ty], [llvm_ptr_ty], + [IntrNoMem]>; + + /* Find the number of instances of a dataflow node in the specified dimension + * intrinsic - + */ + + /* i64 llvm.hpvm.getNumNodeInstances.[xyz](i8*); + */ + def int_hpvm_getNumNodeInstances_x : Intrinsic<[llvm_i64_ty], [llvm_ptr_ty], + [IntrNoMem]>; + + def int_hpvm_getNumNodeInstances_y : Intrinsic<[llvm_i64_ty], [llvm_ptr_ty], + [IntrNoMem]>; + + def int_hpvm_getNumNodeInstances_z : Intrinsic<[llvm_i64_ty], [llvm_ptr_ty], + [IntrNoMem]>; + + /* Local Barrier + * void llvm.hpvm.barrier(); + */ + def int_hpvm_barrier : Intrinsic<[], [], []>; + + /* Memory allocation inside the graph + * i8* llvm.hpvm.malloc(); + */ + def int_hpvm_malloc : Intrinsic<[llvm_ptr_ty], [llvm_i64_ty], []>; + + /* Find the vector length supported by target architecture + * intrinsic - + * i32 llvm.hpvm.getVectorLength(); + */ + def int_hpvm_getVectorLength : Intrinsic<[llvm_i32_ty], [], []>; + + /* ============ Atomic intrinsics ============= */ + // Atomic arithmetic operations + + /* i32 llvm.hpvm.atomic.add(i32*, i32)*/ + def int_hpvm_atomic_add: Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], + []>; + + /* i32 llvm.hpvm.atomic.sub(i32*, i32)*/ + def int_hpvm_atomic_sub: Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], + []>; + + /* i32 llvm.hpvm.atomic.xchg(i32*, i32)*/ + def int_hpvm_atomic_xchg: Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], + []>; + + /* i32 llvm.hpvm.atomic.min(i32*, i32)*/ + def int_hpvm_atomic_min: Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], + []>; + + /* i32 llvm.hpvm.atomic.maxi32*, i32)*/ + def int_hpvm_atomic_max: Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], + []>; + + // Atomic bitwise operations + + /* i32 llvm.hpvm.atomic.and(i32*, i32)*/ + def int_hpvm_atomic_and: Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], + []>; + + /* i32 llvm.hpvm.atomic.or(i32*, i32)*/ + def int_hpvm_atomic_or: Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], + []>; + + /* i32 llvm.hpvm.atomic.xor(i32*, i32)*/ + def int_hpvm_atomic_xor: Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], + []>; + +} diff --git a/hpvm/llvm_patches/include/IR/IntrinsicsVISC.td b/hpvm/llvm_patches/include/IR/IntrinsicsVISC.td deleted file mode 100644 index d5330175d86c9576394c9363a4ba30fd651f19e8..0000000000000000000000000000000000000000 --- a/hpvm/llvm_patches/include/IR/IntrinsicsVISC.td +++ /dev/null @@ -1,208 +0,0 @@ -//===- IntrinsicsVISC.td - Defines VISC intrinsics ---------*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines all of the VISC-specific intrinsics. -// -//===----------------------------------------------------------------------===// - -let TargetPrefix = "visc" in { - /* All intrinsics start with "llvm.visc." - * As we do not want the compiler to mess with these intrinsics, we assume - * worst memory behavior for all these intrinsics. - */ - - /* Initialization intrinsic - - * i8* llvm.visc.setup(function*); - */ - def int_visc_init : Intrinsic<[], [], []>; - - /* Launch intrinsic - with streaming argument - * i8* llvm.visc.launch(i8*, ArgList*, i1); - */ - def int_visc_launch : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, - llvm_ptr_ty, llvm_i1_ty], []>; - - /* Push intrinsic - push data on streaming pipeline - * void llvm.visc.push(i8*, ArgList*); - */ - def int_visc_push : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty], []>; - - /* Pop intrinsic - pop data from streaming pipeline - * i8* llvm.visc.pop(i8*); - */ - def int_visc_pop : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], []>; - - /* Cleanup intrinsic - - * void llvm.visc.cleanup(i8*); - */ - def int_visc_cleanup : Intrinsic<[], [], []>; - - /* Wait intrinsic - - * void llvm.visc.wait(graphID*); - */ - def int_visc_wait : Intrinsic<[], [llvm_ptr_ty], []>; - - /* Track memory intrinsic - - * void llvm.visc.trackMemory(i8*, i64); - */ - def int_visc_trackMemory : Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty], []>; - - /* Track memory intrinsic - - * void llvm.visc.untrackMemory(i8*); - */ - def int_visc_untrackMemory : Intrinsic<[], [llvm_ptr_ty], []>; - - /* Request memory intrinsic - - * void llvm.visc.requestMemory(i8*, i64); - */ - def int_visc_requestMemory : Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty], []>; - - /* Create Node intrinsic - - * i8* llvm.visc.createNode(function*); - */ - def int_visc_createNode : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], []>; - - /* Create Node 1D array intrinsic - - * i8* llvm.visc.createNode1D(function*, i64); - */ - def int_visc_createNode1D : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, - llvm_i64_ty], []>; - - /* Create Node 2D array intrinsic - - * i8* llvm.visc.createNode2D(function*, i64, i64); - */ - def int_visc_createNode2D : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, - llvm_i64_ty, llvm_i64_ty], []>; - - /* Create Node 3D array intrinsic - - * i8* llvm.visc.createNode2D(function*, i64, i64, i64); - */ - def int_visc_createNode3D : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, - llvm_i64_ty, llvm_i64_ty, llvm_i64_ty], - []>; - - /* Create dataflow edge intrinsic - - * i8* llvm.visc.createEdge(i8*, i8*, i1, i32, i32, i1); - */ - def int_visc_createEdge : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_ptr_ty, - llvm_i1_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i1_ty], - []>; - - /* Create bind input intrinsic - - * void llvm.visc.bind.input(i8*, i32, i32); - */ - def int_visc_bind_input : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i1_ty], []>; - - /* Create bind output intrinsic - - * void llvm.visc.bind.output(i8*, i32, i32); - */ - def int_visc_bind_output : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i1_ty], []>; - - /* Find associated dataflow node intrinsic - - * i8* llvm.visc.getNode(); - */ - def int_visc_getNode : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>; - - /* Find parent dataflow node intrinsic - - * i8* llvm.visc.getParentNode(i8*); - */ - def int_visc_getParentNode : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], [IntrNoMem]>; - - /* Find the number of dimensions of a dataflow node intrinsic - - * i32 llvm.visc.getNumDims(i8*); - */ - def int_visc_getNumDims : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrNoMem]>; - - /* Find the unique indentifier of a dataflow node (with respect to his parent - * node) in the specified dimension intrinsic - - */ - - /* i64 llvm.visc.getNodeInstanceID.[xyz](i8*); - */ - def int_visc_getNodeInstanceID_x : Intrinsic<[llvm_i64_ty], [llvm_ptr_ty], - [IntrNoMem]>; - - def int_visc_getNodeInstanceID_y : Intrinsic<[llvm_i64_ty], [llvm_ptr_ty], - [IntrNoMem]>; - - def int_visc_getNodeInstanceID_z : Intrinsic<[llvm_i64_ty], [llvm_ptr_ty], - [IntrNoMem]>; - - /* Find the number of instances of a dataflow node in the specified dimension - * intrinsic - - */ - - /* i64 llvm.visc.getNumNodeInstances.[xyz](i8*); - */ - def int_visc_getNumNodeInstances_x : Intrinsic<[llvm_i64_ty], [llvm_ptr_ty], - [IntrNoMem]>; - - def int_visc_getNumNodeInstances_y : Intrinsic<[llvm_i64_ty], [llvm_ptr_ty], - [IntrNoMem]>; - - def int_visc_getNumNodeInstances_z : Intrinsic<[llvm_i64_ty], [llvm_ptr_ty], - [IntrNoMem]>; - - /* Local Barrier - * void llvm.visc.barrier(); - */ - def int_visc_barrier : Intrinsic<[], [], []>; - - /* Memory allocation inside the graph - * i8* llvm.visc.malloc(); - */ - def int_visc_malloc : Intrinsic<[llvm_ptr_ty], [llvm_i64_ty], []>; - - /* Find the vector length supported by target architecture - * intrinsic - - * i32 llvm.visc.getVectorLength(); - */ - def int_visc_getVectorLength : Intrinsic<[llvm_i32_ty], [], []>; - - /* ============ Atomic intrinsics ============= */ - // Atomic arithmetic operations - - /* i32 llvm.visc.atomic.add(i32*, i32)*/ - def int_visc_atomic_add: Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], - []>; - - /* i32 llvm.visc.atomic.sub(i32*, i32)*/ - def int_visc_atomic_sub: Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], - []>; - - /* i32 llvm.visc.atomic.xchg(i32*, i32)*/ - def int_visc_atomic_xchg: Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], - []>; - - /* i32 llvm.visc.atomic.min(i32*, i32)*/ - def int_visc_atomic_min: Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], - []>; - - /* i32 llvm.visc.atomic.maxi32*, i32)*/ - def int_visc_atomic_max: Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], - []>; - - // Atomic bitwise operations - - /* i32 llvm.visc.atomic.and(i32*, i32)*/ - def int_visc_atomic_and: Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], - []>; - - /* i32 llvm.visc.atomic.or(i32*, i32)*/ - def int_visc_atomic_or: Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], - []>; - - /* i32 llvm.visc.atomic.xor(i32*, i32)*/ - def int_visc_atomic_xor: Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], - []>; - -} diff --git a/hpvm/llvm_patches/lib/AsmParser/LLLexer.cpp b/hpvm/llvm_patches/lib/AsmParser/LLLexer.cpp index a924405a2cac85ccd2e5e903a1ee1abb52774566..2c54392f8020ac7334117f1343214d085dbd6b84 100644 --- a/hpvm/llvm_patches/lib/AsmParser/LLLexer.cpp +++ b/hpvm/llvm_patches/lib/AsmParser/LLLexer.cpp @@ -855,7 +855,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(bit); KEYWORD(varFlags); - // VISC parameter attributes + // HPVM parameter attributes KEYWORD(in); KEYWORD(out); KEYWORD(inout); diff --git a/hpvm/llvm_patches/lib/AsmParser/LLParser.cpp b/hpvm/llvm_patches/lib/AsmParser/LLParser.cpp index f5ce44e2a920405f7e3790fcb1d9eb7fba28d636..7446ff1e32dd79a18fd678446af56e6d193468ad 100644 --- a/hpvm/llvm_patches/lib/AsmParser/LLParser.cpp +++ b/hpvm/llvm_patches/lib/AsmParser/LLParser.cpp @@ -1470,7 +1470,7 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B, case lltok::kw_swiftself: case lltok::kw_immarg: - // VISC Parameter only attributes + // HPVM Parameter only attributes case lltok::kw_in: case lltok::kw_out: case lltok::kw_inout: @@ -1808,7 +1808,7 @@ bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) { B.addAttribute(Attribute::ImmArg); break; - // VISC parameter attributes + // HPVM parameter attributes case lltok::kw_in: B.addAttribute(Attribute::In); break; @@ -1927,7 +1927,7 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) { case lltok::kw_swiftself: case lltok::kw_immarg: - // VISC Parameter only attributes + // HPVM Parameter only attributes case lltok::kw_in: case lltok::kw_out: case lltok::kw_inout: diff --git a/hpvm/llvm_patches/lib/AsmParser/LLToken.h b/hpvm/llvm_patches/lib/AsmParser/LLToken.h index 7f9816965b2a21ae3d23873ca789a22481b575fa..cb0479b41c3b9e68d9697cd9d8adce4c80fa5c25 100644 --- a/hpvm/llvm_patches/lib/AsmParser/LLToken.h +++ b/hpvm/llvm_patches/lib/AsmParser/LLToken.h @@ -351,7 +351,7 @@ enum Kind { kw_insertvalue, kw_blockaddress, - // VISC parameter attributes + // HPVM parameter attributes kw_in, kw_out, kw_inout, diff --git a/hpvm/llvm_patches/lib/Bitcode/Reader/BitcodeReader.cpp b/hpvm/llvm_patches/lib/Bitcode/Reader/BitcodeReader.cpp index 7eb289d5872713ef826174b1e691c6440d4dd43e..a1e64472850911013250976312a8dd7d8b879c98 100644 --- a/hpvm/llvm_patches/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/hpvm/llvm_patches/lib/Bitcode/Reader/BitcodeReader.cpp @@ -1395,7 +1395,7 @@ static uint64_t getRawAttributeMask(Attribute::AttrKind Val) { case Attribute::NoFree: return 1ULL << 63; - // VISC Attributes + // HPVM Attributes case Attribute::In: return 3ULL << 0; case Attribute::Out: diff --git a/hpvm/llvm_patches/lib/Bitcode/Writer/BitcodeWriter.cpp b/hpvm/llvm_patches/lib/Bitcode/Writer/BitcodeWriter.cpp index 55e7415efbea2b37d85f20b1d123ce9a80efe67e..fd671c397583fad6ec8a9998635705417f59eed1 100644 --- a/hpvm/llvm_patches/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/hpvm/llvm_patches/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -773,7 +773,7 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) { case Attribute::SanitizeMemTag: return bitc::ATTR_KIND_SANITIZE_MEMTAG; - // VISC Attributes + // HPVM Attributes case Attribute::In: return bitc::ATTR_KIND_IN; case Attribute::Out: diff --git a/hpvm/llvm_patches/lib/IR/Attributes.cpp b/hpvm/llvm_patches/lib/IR/Attributes.cpp index 3cc95b3102fdf6c7062fffe1f9486cfa094bba9b..29c47a9e1107524278dcc57c188b320821ba7d86 100644 --- a/hpvm/llvm_patches/lib/IR/Attributes.cpp +++ b/hpvm/llvm_patches/lib/IR/Attributes.cpp @@ -404,7 +404,7 @@ std::string Attribute::getAsString(bool InAttrGrp) const { if (hasAttribute(Attribute::ImmArg)) return "immarg"; - // VISC attributes for arguments + // HPVM attributes for arguments if (hasAttribute(Attribute::In)) return "in"; if (hasAttribute(Attribute::Out)) diff --git a/hpvm/projects/hpvm-rt/CMakeLists.txt b/hpvm/projects/hpvm-rt/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..be7f69c4bfa7623c093bd5e913af1de3dbcf951c --- /dev/null +++ b/hpvm/projects/hpvm-rt/CMakeLists.txt @@ -0,0 +1,22 @@ +add_definitions(-DNUM_CORES=8) + +SET(CMAKE_C_COMPILER ${CMAKE_BINARY_DIR}/bin/clang) +SET(CMAKE_CXX_COMPILER ${CMAKE_BINARY_DIR}/bin/clang++) + +add_llvm_library(hpvm-rt.ll hpvm-rt.cpp + + DEPENDS + clang + llvm-dis + ) + + +target_compile_options(hpvm-rt.ll PUBLIC -flto ) +target_compile_options(hpvm-rt.ll PUBLIC -std=c++11) + +add_custom_target(hpvm-rt.cpp.o ALL + COMMAND ar -x ${CMAKE_BINARY_DIR}/lib/libhpvm-rt.ll.a + COMMAND mv ${CMAKE_BINARY_DIR}/tools/hpvm/projects/hpvm-rt/hpvm-rt.cpp.o ${CMAKE_BINARY_DIR}/tools/hpvm/projects/hpvm-rt/hpvm-rt.bc + COMMAND ${CMAKE_BINARY_DIR}/bin/llvm-dis ${CMAKE_BINARY_DIR}/tools/hpvm/projects/hpvm-rt/hpvm-rt.bc) + +add_dependencies(hpvm-rt.cpp.o hpvm-rt.ll) diff --git a/hpvm/projects/hpvm-rt/deviceStatusSwitchIntervals.txt b/hpvm/projects/hpvm-rt/deviceStatusSwitchIntervals.txt new file mode 100644 index 0000000000000000000000000000000000000000..7069470a1a6f8b1a49eea2824f27204ebdf3fb26 --- /dev/null +++ b/hpvm/projects/hpvm-rt/deviceStatusSwitchIntervals.txt @@ -0,0 +1,2 @@ +10 +10 15 10 16 15 30 15 25 20 15 diff --git a/hpvm/projects/hpvm-rt/device_abstraction.h b/hpvm/projects/hpvm-rt/device_abstraction.h new file mode 100644 index 0000000000000000000000000000000000000000..4948502ce8ae47cbb7e37c1372fcd81813486e15 --- /dev/null +++ b/hpvm/projects/hpvm-rt/device_abstraction.h @@ -0,0 +1,80 @@ +#ifndef __DEVICE_ABSTRACTION__ +#define __DEVICE_ABSTRACTION__ + +#include <fstream> +#include <iostream> +#include <stdio.h> +#include <stdlib.h> +#include <thread> +#include <time.h> +#include <vector> + +#define MIN_INTERVAL 2 +#define MAX_INTERVAL 8 +#define NUM_INTERVALS 10 + +// Device status variable: true if the device is available for use +volatile bool deviceStatus = true; +// Intervals at which to change the device status +std::vector<unsigned> Intervals; + +// Set to true when program execution ends and so we can end the device +// simulation +volatile bool executionEnd = false; + +void initializeDeviceStatusIntervals() { + + unsigned sz = 0; + unsigned tmp = 0; + + const char *fn = "/home/kotsifa2/HPVM/hpvm/build/projects/hpvm-rt/" + "deviceStatusSwitchIntervals.txt"; + std::ifstream infile; + infile.open(fn); + if (!infile.is_open()) { + std::cout << "Failed to open " << fn << " for reading\n"; + return; + } + infile >> sz; + + if (sz) { + // We have data. Read them into the vector + for (unsigned i = 0; i < sz; i++) { + infile >> tmp; + Intervals.push_back(tmp); + } + infile.close(); + } else { + // We have no data. Create random data and write them into the file + infile.close(); + std::ofstream outfile; + outfile.open(fn); + if (!outfile.is_open()) { + std::cout << "Failed to open " << fn << " for writing\n"; + return; + } + sz = 1 + rand() % NUM_INTERVALS; + outfile << sz; + for (unsigned i = 0; i < sz; i++) { + Intervals.push_back(MIN_INTERVAL + + rand() % (MAX_INTERVAL - MIN_INTERVAL)); + outfile << Intervals[i]; + } + outfile.close(); + } + + return; +} + +void updateDeviceStatus() { + + unsigned i = 0; + while (!executionEnd) { + std::this_thread::sleep_for(std::chrono::seconds(Intervals[i])); + deviceStatus = !deviceStatus; + std::cout << "Changed device status to " << deviceStatus << "\n"; + i = (i + 1) % Intervals.size(); + } +} + +#endif // __DEVICE_ABSTRACTION__ diff --git a/hpvm/projects/visc-rt/visc-rt.cpp b/hpvm/projects/hpvm-rt/hpvm-rt.cpp similarity index 82% rename from hpvm/projects/visc-rt/visc-rt.cpp rename to hpvm/projects/hpvm-rt/hpvm-rt.cpp index df5b1b80f7ae71ca49f461a50f36f81064028ef9..cb3206ef500f7223a0463598f7b35d0b182f5f5f 100644 --- a/hpvm/projects/visc-rt/visc-rt.cpp +++ b/hpvm/projects/hpvm-rt/hpvm-rt.cpp @@ -13,7 +13,7 @@ #if _POSIX_VERSION >= 200112L #include <sys/time.h> #endif -#include "visc-rt.h" +#include "hpvm-rt.h" #ifndef DEBUG_BUILD #define DEBUG(s) \ @@ -58,7 +58,7 @@ vector<DFGDepth> DStack; pthread_mutex_t ocl_mtx; #define NUM_TESTS 1 -visc_TimerSet kernel_timer; +hpvm_TimerSet kernel_timer; static inline void checkErr(cl_int err, cl_int success, const char *name) { if (err != success) { @@ -70,7 +70,7 @@ static inline void checkErr(cl_int err, cl_int success, const char *name) { /************************* Depth Stack Routines ***************************/ -void llvm_visc_x86_dstack_push(unsigned n, uint64_t limitX, uint64_t iX, +void llvm_hpvm_x86_dstack_push(unsigned n, uint64_t limitX, uint64_t iX, uint64_t limitY, uint64_t iY, uint64_t limitZ, uint64_t iZ) { DEBUG(cout << "Pushing node information on stack:\n"); @@ -84,7 +84,7 @@ void llvm_visc_x86_dstack_push(unsigned n, uint64_t limitX, uint64_t iX, pthread_mutex_unlock(&ocl_mtx); } -void llvm_visc_x86_dstack_pop() { +void llvm_hpvm_x86_dstack_pop() { DEBUG(cout << "Popping from depth stack\n"); pthread_mutex_lock(&ocl_mtx); DStack.pop_back(); @@ -92,7 +92,7 @@ void llvm_visc_x86_dstack_pop() { pthread_mutex_unlock(&ocl_mtx); } -uint64_t llvm_visc_x86_getDimLimit(unsigned level, unsigned dim) { +uint64_t llvm_hpvm_x86_getDimLimit(unsigned level, unsigned dim) { DEBUG(cout << "Request limit for dim " << dim << " of ancestor " << level << flush << "\n"); pthread_mutex_lock(&ocl_mtx); @@ -104,7 +104,7 @@ uint64_t llvm_visc_x86_getDimLimit(unsigned level, unsigned dim) { return result; } -uint64_t llvm_visc_x86_getDimInstance(unsigned level, unsigned dim) { +uint64_t llvm_hpvm_x86_getDimInstance(unsigned level, unsigned dim) { DEBUG(cout << "Request instance id for dim " << dim << " of ancestor " << level << flush << "\n"); pthread_mutex_lock(&ocl_mtx); @@ -118,7 +118,7 @@ uint64_t llvm_visc_x86_getDimInstance(unsigned level, unsigned dim) { /********************** Memory Tracking Routines **************************/ -void llvm_visc_track_mem(void *ptr, size_t size) { +void llvm_hpvm_track_mem(void *ptr, size_t size) { DEBUG(cout << "Start tracking memory: " << ptr << flush << "\n"); MemTrackerEntry *MTE = MTracker.lookup(ptr); if (MTE != NULL) { @@ -130,7 +130,7 @@ void llvm_visc_track_mem(void *ptr, size_t size) { DEBUG(MTracker.print()); } -void llvm_visc_untrack_mem(void *ptr) { +void llvm_hpvm_untrack_mem(void *ptr) { DEBUG(cout << "Stop tracking memory: " << ptr << flush << "\n"); MemTrackerEntry *MTE = MTracker.lookup(ptr); if (MTE == NULL) { @@ -145,7 +145,7 @@ void llvm_visc_untrack_mem(void *ptr) { DEBUG(MTracker.print()); } -static void *llvm_visc_ocl_request_mem(void *ptr, size_t size, +static void *llvm_hpvm_ocl_request_mem(void *ptr, size_t size, DFNodeContext_OCL *Context, bool isInput, bool isOutput) { pthread_mutex_lock(&ocl_mtx); @@ -183,7 +183,7 @@ static void *llvm_visc_ocl_request_mem(void *ptr, size_t size, else clFlags = CL_MEM_READ_ONLY; - visc_SwitchToTimer(&kernel_timer, visc_TimerID_COPY); + hpvm_SwitchToTimer(&kernel_timer, hpvm_TimerID_COPY); // pthread_mutex_lock(&ocl_mtx); cl_mem d_input = clCreateBuffer(Context->clOCLContext, clFlags, size, NULL, &errcode); @@ -199,7 +199,7 @@ static void *llvm_visc_ocl_request_mem(void *ptr, size_t size, checkErr(errcode, CL_SUCCESS, "Failure to copy memory to device"); } - visc_SwitchToTimer(&kernel_timer, visc_TimerID_NONE); + hpvm_SwitchToTimer(&kernel_timer, hpvm_TimerID_NONE); DEBUG(cout << " done\n"); MTE->update(MemTrackerEntry::DEVICE, (void *)d_input, Context); DEBUG(cout << "Updated Table\n"); @@ -208,11 +208,11 @@ static void *llvm_visc_ocl_request_mem(void *ptr, size_t size, return d_input; } -void *llvm_visc_x86_argument_ptr(void *ptr, size_t size) { - return llvm_visc_request_mem(ptr, size); +void *llvm_hpvm_x86_argument_ptr(void *ptr, size_t size) { + return llvm_hpvm_request_mem(ptr, size); } -void *llvm_visc_request_mem(void *ptr, size_t size) { +void *llvm_hpvm_request_mem(void *ptr, size_t size) { pthread_mutex_lock(&ocl_mtx); DEBUG(cout << "[X86] Request memory: " << ptr << flush << "\n"); MemTrackerEntry *MTE = MTracker.lookup(ptr); @@ -233,13 +233,13 @@ void *llvm_visc_request_mem(void *ptr, size_t size) { DEBUG(cout << "\tMemory found on device at: " << MTE->getAddress() << flush << "\n"); DEBUG(cout << "\tCopying ..."); - visc_SwitchToTimer(&kernel_timer, visc_TimerID_COPY); + hpvm_SwitchToTimer(&kernel_timer, hpvm_TimerID_COPY); // pthread_mutex_lock(&ocl_mtx); cl_int errcode = clEnqueueReadBuffer( ((DFNodeContext_OCL *)MTE->getContext())->clCommandQue, (cl_mem)MTE->getAddress(), CL_TRUE, 0, size, ptr, 0, NULL, NULL); // pthread_mutex_unlock(&ocl_mtx); - visc_SwitchToTimer(&kernel_timer, visc_TimerID_NONE); + hpvm_SwitchToTimer(&kernel_timer, hpvm_TimerID_NONE); DEBUG(cout << " done\n"); checkErr(errcode, CL_SUCCESS, "[request mem] Failure to read output"); DEBUG(cout << "Free mem object on device\n"); @@ -253,25 +253,25 @@ void *llvm_visc_request_mem(void *ptr, size_t size) { /*************************** Timer Routines **********************************/ -static int is_async(enum visc_TimerID timer) { - return (timer == visc_TimerID_KERNEL) || (timer == visc_TimerID_COPY_ASYNC); +static int is_async(enum hpvm_TimerID timer) { + return (timer == hpvm_TimerID_KERNEL) || (timer == hpvm_TimerID_COPY_ASYNC); } -static int is_blocking(enum visc_TimerID timer) { - return (timer == visc_TimerID_COPY) || (timer == visc_TimerID_NONE); +static int is_blocking(enum hpvm_TimerID timer) { + return (timer == hpvm_TimerID_COPY) || (timer == hpvm_TimerID_NONE); } -#define INVALID_TIMERID visc_TimerID_LAST +#define INVALID_TIMERID hpvm_TimerID_LAST -static int asyncs_outstanding(struct visc_TimerSet *timers) { +static int asyncs_outstanding(struct hpvm_TimerSet *timers) { return (timers->async_markers != NULL) && (timers->async_markers->timerID != INVALID_TIMERID); } -static struct visc_async_time_marker_list * -get_last_async(struct visc_TimerSet *timers) { +static struct hpvm_async_time_marker_list * +get_last_async(struct hpvm_TimerSet *timers) { /* Find the last event recorded thus far */ - struct visc_async_time_marker_list *last_event = timers->async_markers; + struct hpvm_async_time_marker_list *last_event = timers->async_markers; if (last_event != NULL && last_event->timerID != INVALID_TIMERID) { while (last_event->next != NULL && last_event->next->timerID != INVALID_TIMERID) @@ -281,17 +281,17 @@ get_last_async(struct visc_TimerSet *timers) { return NULL; } -static void insert_marker(struct visc_TimerSet *tset, enum visc_TimerID timer) { +static void insert_marker(struct hpvm_TimerSet *tset, enum hpvm_TimerID timer) { cl_int ciErrNum = CL_SUCCESS; - struct visc_async_time_marker_list **new_event = &(tset->async_markers); + struct hpvm_async_time_marker_list **new_event = &(tset->async_markers); while (*new_event != NULL && (*new_event)->timerID != INVALID_TIMERID) { new_event = &((*new_event)->next); } if (*new_event == NULL) { - *new_event = (struct visc_async_time_marker_list *)malloc( - sizeof(struct visc_async_time_marker_list)); + *new_event = (struct hpvm_async_time_marker_list *)malloc( + sizeof(struct hpvm_async_time_marker_list)); (*new_event)->marker = calloc(1, sizeof(cl_event)); /* // I don't think this is needed at all. I believe clEnqueueMarker 'creates' @@ -322,18 +322,18 @@ Event Status!\n"); } } -static void insert_submarker(struct visc_TimerSet *tset, char *label, - enum visc_TimerID timer) { +static void insert_submarker(struct hpvm_TimerSet *tset, char *label, + enum hpvm_TimerID timer) { cl_int ciErrNum = CL_SUCCESS; - struct visc_async_time_marker_list **new_event = &(tset->async_markers); + struct hpvm_async_time_marker_list **new_event = &(tset->async_markers); while (*new_event != NULL && (*new_event)->timerID != INVALID_TIMERID) { new_event = &((*new_event)->next); } if (*new_event == NULL) { - *new_event = (struct visc_async_time_marker_list *)malloc( - sizeof(struct visc_async_time_marker_list)); + *new_event = (struct hpvm_async_time_marker_list *)malloc( + sizeof(struct hpvm_async_time_marker_list)); (*new_event)->marker = calloc(1, sizeof(cl_event)); /* #if ( __OPENCL_VERSION__ >= CL_VERSION_1_1 ) @@ -364,10 +364,10 @@ Event Status!\n"); } /* Assumes that all recorded events have completed */ -static visc_Timestamp record_async_times(struct visc_TimerSet *tset) { - struct visc_async_time_marker_list *next_interval = NULL; - struct visc_async_time_marker_list *last_marker = get_last_async(tset); - visc_Timestamp total_async_time = 0; +static hpvm_Timestamp record_async_times(struct hpvm_TimerSet *tset) { + struct hpvm_async_time_marker_list *next_interval = NULL; + struct hpvm_async_time_marker_list *last_marker = get_last_async(tset); + hpvm_Timestamp total_async_time = 0; for (next_interval = tset->async_markers; next_interval != last_marker; next_interval = next_interval->next) { @@ -389,11 +389,11 @@ static visc_Timestamp record_async_times(struct visc_TimerSet *tset) { ciErrNum); } - visc_Timestamp interval = - (visc_Timestamp)(((double)(command_end - command_start))); + hpvm_Timestamp interval = + (hpvm_Timestamp)(((double)(command_end - command_start))); tset->timers[next_interval->timerID].elapsed += interval; if (next_interval->label != NULL) { - struct visc_SubTimer *subtimer = + struct hpvm_SubTimer *subtimer = tset->sub_timer_list[next_interval->timerID]->subtimer_list; while (subtimer != NULL) { if (strcmp(subtimer->label, next_interval->label) == 0) { @@ -413,8 +413,8 @@ static visc_Timestamp record_async_times(struct visc_TimerSet *tset) { return total_async_time; } -static void accumulate_time(visc_Timestamp *accum, visc_Timestamp start, - visc_Timestamp end) { +static void accumulate_time(hpvm_Timestamp *accum, hpvm_Timestamp start, + hpvm_Timestamp end) { #if _POSIX_VERSION >= 200112L *accum += end - start; #else @@ -423,33 +423,33 @@ static void accumulate_time(visc_Timestamp *accum, visc_Timestamp start, } #if _POSIX_VERSION >= 200112L -static visc_Timestamp get_time() { +static hpvm_Timestamp get_time() { struct timespec tv; clock_gettime(CLOCK_MONOTONIC, &tv); - return (visc_Timestamp)(tv.tv_sec * BILLION + tv.tv_nsec); + return (hpvm_Timestamp)(tv.tv_sec * BILLION + tv.tv_nsec); } #else #error "no supported time libraries are available on this platform" #endif -void visc_ResetTimer(struct visc_Timer *timer) { - timer->state = visc_Timer_STOPPED; +void hpvm_ResetTimer(struct hpvm_Timer *timer) { + timer->state = hpvm_Timer_STOPPED; #if _POSIX_VERSION >= 200112L timer->elapsed = 0; #else -#error "visc_ResetTimer: not implemented for this system" +#error "hpvm_ResetTimer: not implemented for this system" #endif } -void visc_StartTimer(struct visc_Timer *timer) { - if (timer->state != visc_Timer_STOPPED) { +void hpvm_StartTimer(struct hpvm_Timer *timer) { + if (timer->state != hpvm_Timer_STOPPED) { // FIXME: Removing warning statement to avoid printing this error // fputs("Ignoring attempt to start a running timer\n", stderr); return; } - timer->state = visc_Timer_RUNNING; + timer->state = hpvm_Timer_RUNNING; #if _POSIX_VERSION >= 200112L { @@ -458,19 +458,19 @@ void visc_StartTimer(struct visc_Timer *timer) { timer->init = tv.tv_sec * BILLION + tv.tv_nsec; } #else -#error "visc_StartTimer: not implemented for this system" +#error "hpvm_StartTimer: not implemented for this system" #endif } -void visc_StartTimerAndSubTimer(struct visc_Timer *timer, - struct visc_Timer *subtimer) { +void hpvm_StartTimerAndSubTimer(struct hpvm_Timer *timer, + struct hpvm_Timer *subtimer) { unsigned int numNotStopped = 0x3; // 11 - if (timer->state != visc_Timer_STOPPED) { + if (timer->state != hpvm_Timer_STOPPED) { fputs("Warning: Timer was not stopped\n", stderr); numNotStopped &= 0x1; // Zero out 2^1 } - if (subtimer->state != visc_Timer_STOPPED) { + if (subtimer->state != hpvm_Timer_STOPPED) { fputs("Warning: Subtimer was not stopped\n", stderr); numNotStopped &= 0x2; // Zero out 2^0 } @@ -479,8 +479,8 @@ void visc_StartTimerAndSubTimer(struct visc_Timer *timer, return; } - timer->state = visc_Timer_RUNNING; - subtimer->state = visc_Timer_RUNNING; + timer->state = hpvm_Timer_RUNNING; + subtimer->state = hpvm_Timer_RUNNING; #if _POSIX_VERSION >= 200112L { @@ -496,19 +496,19 @@ void visc_StartTimerAndSubTimer(struct visc_Timer *timer, } } #else -#error "visc_StartTimer: not implemented for this system" +#error "hpvm_StartTimer: not implemented for this system" #endif } -void visc_StopTimer(struct visc_Timer *timer) { - visc_Timestamp fini; +void hpvm_StopTimer(struct hpvm_Timer *timer) { + hpvm_Timestamp fini; - if (timer->state != visc_Timer_RUNNING) { + if (timer->state != hpvm_Timer_RUNNING) { // fputs("Ignoring attempt to stop a stopped timer\n", stderr); return; } - timer->state = visc_Timer_STOPPED; + timer->state = hpvm_Timer_STOPPED; #if _POSIX_VERSION >= 200112L { @@ -517,24 +517,24 @@ void visc_StopTimer(struct visc_Timer *timer) { fini = tv.tv_sec * BILLION + tv.tv_nsec; } #else -#error "visc_StopTimer: not implemented for this system" +#error "hpvm_StopTimer: not implemented for this system" #endif accumulate_time(&timer->elapsed, timer->init, fini); timer->init = fini; } -void visc_StopTimerAndSubTimer(struct visc_Timer *timer, - struct visc_Timer *subtimer) { +void hpvm_StopTimerAndSubTimer(struct hpvm_Timer *timer, + struct hpvm_Timer *subtimer) { - visc_Timestamp fini; + hpvm_Timestamp fini; unsigned int numNotRunning = 0x3; // 11 - if (timer->state != visc_Timer_RUNNING) { + if (timer->state != hpvm_Timer_RUNNING) { fputs("Warning: Timer was not running\n", stderr); numNotRunning &= 0x1; // Zero out 2^1 } - if (subtimer->state != visc_Timer_RUNNING) { + if (subtimer->state != hpvm_Timer_RUNNING) { fputs("Warning: Subtimer was not running\n", stderr); numNotRunning &= 0x2; // Zero out 2^0 } @@ -543,8 +543,8 @@ void visc_StopTimerAndSubTimer(struct visc_Timer *timer, return; } - timer->state = visc_Timer_STOPPED; - subtimer->state = visc_Timer_STOPPED; + timer->state = hpvm_Timer_STOPPED; + subtimer->state = hpvm_Timer_STOPPED; #if _POSIX_VERSION >= 200112L { @@ -553,7 +553,7 @@ void visc_StopTimerAndSubTimer(struct visc_Timer *timer, fini = tv.tv_sec * BILLION + tv.tv_nsec; } #else -#error "visc_StopTimer: not implemented for this system" +#error "hpvm_StopTimer: not implemented for this system" #endif if (numNotRunning & 0x2) { @@ -568,59 +568,59 @@ void visc_StopTimerAndSubTimer(struct visc_Timer *timer, } /* Get the elapsed time in seconds. */ -double visc_GetElapsedTime(struct visc_Timer *timer) { +double hpvm_GetElapsedTime(struct hpvm_Timer *timer) { double ret; - if (timer->state != visc_Timer_STOPPED) { + if (timer->state != hpvm_Timer_STOPPED) { fputs("Elapsed time from a running timer is inaccurate\n", stderr); } #if _POSIX_VERSION >= 200112L ret = timer->elapsed / 1e9; #else -#error "visc_GetElapsedTime: not implemented for this system" +#error "hpvm_GetElapsedTime: not implemented for this system" #endif return ret; } -void visc_InitializeTimerSet(struct visc_TimerSet *timers) { +void hpvm_InitializeTimerSet(struct hpvm_TimerSet *timers) { int n; timers->wall_begin = get_time(); - timers->current = visc_TimerID_NONE; + timers->current = hpvm_TimerID_NONE; timers->async_markers = NULL; - for (n = 0; n < visc_TimerID_LAST; n++) { - visc_ResetTimer(&timers->timers[n]); + for (n = 0; n < hpvm_TimerID_LAST; n++) { + hpvm_ResetTimer(&timers->timers[n]); timers->sub_timer_list[n] = NULL; } } -void visc_AddSubTimer(struct visc_TimerSet *timers, char *label, - enum visc_TimerID visc_Category) { +void hpvm_AddSubTimer(struct hpvm_TimerSet *timers, char *label, + enum hpvm_TimerID hpvm_Category) { - struct visc_SubTimer *subtimer = - (struct visc_SubTimer *)malloc(sizeof(struct visc_SubTimer)); + struct hpvm_SubTimer *subtimer = + (struct hpvm_SubTimer *)malloc(sizeof(struct hpvm_SubTimer)); int len = strlen(label); subtimer->label = (char *)malloc(sizeof(char) * (len + 1)); sprintf(subtimer->label, "%s", label); - visc_ResetTimer(&subtimer->timer); + hpvm_ResetTimer(&subtimer->timer); subtimer->next = NULL; - struct visc_SubTimerList *subtimerlist = - timers->sub_timer_list[visc_Category]; + struct hpvm_SubTimerList *subtimerlist = + timers->sub_timer_list[hpvm_Category]; if (subtimerlist == NULL) { subtimerlist = - (struct visc_SubTimerList *)calloc(1, sizeof(struct visc_SubTimerList)); + (struct hpvm_SubTimerList *)calloc(1, sizeof(struct hpvm_SubTimerList)); subtimerlist->subtimer_list = subtimer; - timers->sub_timer_list[visc_Category] = subtimerlist; + timers->sub_timer_list[hpvm_Category] = subtimerlist; } else { // Append to list - struct visc_SubTimer *element = subtimerlist->subtimer_list; + struct hpvm_SubTimer *element = subtimerlist->subtimer_list; while (element->next != NULL) { element = element->next; } @@ -628,37 +628,37 @@ void visc_AddSubTimer(struct visc_TimerSet *timers, char *label, } } -void visc_SwitchToTimer(struct visc_TimerSet *timers, enum visc_TimerID timer) { +void hpvm_SwitchToTimer(struct hpvm_TimerSet *timers, enum hpvm_TimerID timer) { // cerr << "Switch to timer: " << timer << flush << "\n"; /* Stop the currently running timer */ - if (timers->current != visc_TimerID_NONE) { - struct visc_SubTimerList *subtimerlist = + if (timers->current != hpvm_TimerID_NONE) { + struct hpvm_SubTimerList *subtimerlist = timers->sub_timer_list[timers->current]; - struct visc_SubTimer *currSubTimer = + struct hpvm_SubTimer *currSubTimer = (subtimerlist != NULL) ? subtimerlist->current : NULL; if (!is_async(timers->current)) { if (timers->current != timer) { if (currSubTimer != NULL) { - visc_StopTimerAndSubTimer(&timers->timers[timers->current], + hpvm_StopTimerAndSubTimer(&timers->timers[timers->current], &currSubTimer->timer); } else { - visc_StopTimer(&timers->timers[timers->current]); + hpvm_StopTimer(&timers->timers[timers->current]); } } else { if (currSubTimer != NULL) { - visc_StopTimer(&currSubTimer->timer); + hpvm_StopTimer(&currSubTimer->timer); } } } else { insert_marker(timers, timer); if (!is_async(timer)) { // if switching to async too, keep driver going - visc_StopTimer(&timers->timers[visc_TimerID_DRIVER]); + hpvm_StopTimer(&timers->timers[hpvm_TimerID_DRIVER]); } } } - visc_Timestamp currentTime = get_time(); + hpvm_Timestamp currentTime = get_time(); /* The only cases we check for asynchronous task completion is * when an overlapping CPU operation completes, or the next @@ -666,7 +666,7 @@ void visc_SwitchToTimer(struct visc_TimerSet *timers, enum visc_TimerID timer) { if (asyncs_outstanding(timers) && (!is_async(timers->current) || is_blocking(timer))) { - struct visc_async_time_marker_list *last_event = get_last_async(timers); + struct hpvm_async_time_marker_list *last_event = get_last_async(timers); /* CL_COMPLETE if completed */ cl_int ciErrNum = CL_SUCCESS; @@ -686,7 +686,7 @@ void visc_SwitchToTimer(struct visc_TimerSet *timers, enum visc_TimerID timer) { // timer to switch to is COPY or NONE if (async_done != CL_COMPLETE) { - accumulate_time(&(timers->timers[visc_TimerID_OVERLAP].elapsed), + accumulate_time(&(timers->timers[hpvm_TimerID_OVERLAP].elapsed), timers->async_begin, currentTime); } @@ -696,14 +696,14 @@ void visc_SwitchToTimer(struct visc_TimerSet *timers, enum visc_TimerID timer) { fprintf(stderr, "Error Waiting for Events!\n"); } - visc_Timestamp total_async_time = record_async_times(timers); + hpvm_Timestamp total_async_time = record_async_times(timers); /* Async operations completed before previous CPU operations: * overlapped time is the total async time */ if (async_done == CL_COMPLETE) { // fprintf(stderr, "Async_done: total_async_type = %lld\n", // total_async_time); - timers->timers[visc_TimerID_OVERLAP].elapsed += total_async_time; + timers->timers[hpvm_TimerID_OVERLAP].elapsed += total_async_time; } } else @@ -713,15 +713,15 @@ void visc_SwitchToTimer(struct visc_TimerSet *timers, enum visc_TimerID timer) { if (async_done == CL_COMPLETE) { /* Async operations completed before previous CPU operations: * overlapped time is the total async time */ - timers->timers[visc_TimerID_OVERLAP].elapsed += + timers->timers[hpvm_TimerID_OVERLAP].elapsed += record_async_times(timers); } } /* Start the new timer */ - if (timer != visc_TimerID_NONE) { + if (timer != hpvm_TimerID_NONE) { if (!is_async(timer)) { - visc_StartTimer(&timers->timers[timer]); + hpvm_StartTimer(&timers->timers[timer]); } else { // toSwitchTo Is Async (KERNEL/COPY_ASYNC) if (!asyncs_outstanding(timers)) { @@ -735,48 +735,48 @@ void visc_SwitchToTimer(struct visc_TimerSet *timers, enum visc_TimerID timer) { * so we can rename that marker as the beginning of this async * operation */ - struct visc_async_time_marker_list *last_event = get_last_async(timers); + struct hpvm_async_time_marker_list *last_event = get_last_async(timers); last_event->label = NULL; last_event->timerID = timer; } if (!is_async(timers->current)) { - visc_StartTimer(&timers->timers[visc_TimerID_DRIVER]); + hpvm_StartTimer(&timers->timers[hpvm_TimerID_DRIVER]); } } } timers->current = timer; } -void visc_SwitchToSubTimer(struct visc_TimerSet *timers, char *label, - enum visc_TimerID category) { - struct visc_SubTimerList *subtimerlist = +void hpvm_SwitchToSubTimer(struct hpvm_TimerSet *timers, char *label, + enum hpvm_TimerID category) { + struct hpvm_SubTimerList *subtimerlist = timers->sub_timer_list[timers->current]; - struct visc_SubTimer *curr = + struct hpvm_SubTimer *curr = (subtimerlist != NULL) ? subtimerlist->current : NULL; - if (timers->current != visc_TimerID_NONE) { + if (timers->current != hpvm_TimerID_NONE) { if (!is_async(timers->current)) { if (timers->current != category) { if (curr != NULL) { - visc_StopTimerAndSubTimer(&timers->timers[timers->current], + hpvm_StopTimerAndSubTimer(&timers->timers[timers->current], &curr->timer); } else { - visc_StopTimer(&timers->timers[timers->current]); + hpvm_StopTimer(&timers->timers[timers->current]); } } else { if (curr != NULL) { - visc_StopTimer(&curr->timer); + hpvm_StopTimer(&curr->timer); } } } else { insert_submarker(timers, label, category); if (!is_async(category)) { // if switching to async too, keep driver going - visc_StopTimer(&timers->timers[visc_TimerID_DRIVER]); + hpvm_StopTimer(&timers->timers[hpvm_TimerID_DRIVER]); } } } - visc_Timestamp currentTime = get_time(); + hpvm_Timestamp currentTime = get_time(); /* The only cases we check for asynchronous task completion is * when an overlapping CPU operation completes, or the next @@ -784,7 +784,7 @@ void visc_SwitchToSubTimer(struct visc_TimerSet *timers, char *label, if (asyncs_outstanding(timers) && (!is_async(timers->current) || is_blocking(category))) { - struct visc_async_time_marker_list *last_event = get_last_async(timers); + struct hpvm_async_time_marker_list *last_event = get_last_async(timers); /* CL_COMPLETE if completed */ cl_int ciErrNum = CL_SUCCESS; @@ -808,7 +808,7 @@ void visc_SwitchToSubTimer(struct visc_TimerSet *timers, char *label, // because everything is being stopped to wait for synchronization it // seems that the extra sync wall time isn't being recorded anywhere if (async_done != CL_COMPLETE) - accumulate_time(&(timers->timers[visc_TimerID_OVERLAP].elapsed), + accumulate_time(&(timers->timers[hpvm_TimerID_OVERLAP].elapsed), timers->async_begin, currentTime); /* Wait on async operation completion */ @@ -816,7 +816,7 @@ void visc_SwitchToSubTimer(struct visc_TimerSet *timers, char *label, if (ciErrNum != CL_SUCCESS) { fprintf(stderr, "Error Waiting for Events!\n"); } - visc_Timestamp total_async_time = record_async_times(timers); + hpvm_Timestamp total_async_time = record_async_times(timers); /* Async operations completed before previous CPU operations: * overlapped time is the total async time */ @@ -824,7 +824,7 @@ void visc_SwitchToSubTimer(struct visc_TimerSet *timers, char *label, // into OVERLAP the immediately preceding EventSynchronize theoretically // didn't have any effect since it was already completed. if (async_done == CL_COMPLETE /*cudaSuccess*/) - timers->timers[visc_TimerID_OVERLAP].elapsed += total_async_time; + timers->timers[hpvm_TimerID_OVERLAP].elapsed += total_async_time; } else /* implies (!is_async(timers->current) && asyncs_outstanding(timers)) */ @@ -833,14 +833,14 @@ void visc_SwitchToSubTimer(struct visc_TimerSet *timers, char *label, if (async_done == CL_COMPLETE /*cudaSuccess*/) { /* Async operations completed before previous CPU operations: * overlapped time is the total async time */ - timers->timers[visc_TimerID_OVERLAP].elapsed += + timers->timers[hpvm_TimerID_OVERLAP].elapsed += record_async_times(timers); } // else, this isn't blocking, so just check the next time around } subtimerlist = timers->sub_timer_list[category]; - struct visc_SubTimer *subtimer = NULL; + struct hpvm_SubTimer *subtimer = NULL; if (label != NULL) { subtimer = subtimerlist->subtimer_list; @@ -854,18 +854,18 @@ void visc_SwitchToSubTimer(struct visc_TimerSet *timers, char *label, } /* Start the new timer */ - if (category != visc_TimerID_NONE) { + if (category != hpvm_TimerID_NONE) { if (!is_async(category)) { if (subtimerlist != NULL) { subtimerlist->current = subtimer; } if (category != timers->current && subtimer != NULL) { - visc_StartTimerAndSubTimer(&timers->timers[category], &subtimer->timer); + hpvm_StartTimerAndSubTimer(&timers->timers[category], &subtimer->timer); } else if (subtimer != NULL) { - visc_StartTimer(&subtimer->timer); + hpvm_StartTimer(&subtimer->timer); } else { - visc_StartTimer(&timers->timers[category]); + hpvm_StartTimer(&timers->timers[category]); } } else { if (subtimerlist != NULL) { @@ -883,7 +883,7 @@ void visc_SwitchToSubTimer(struct visc_TimerSet *timers, char *label, * so we can rename that marker as the beginning of this async * operation */ - struct visc_async_time_marker_list *last_event = get_last_async(timers); + struct hpvm_async_time_marker_list *last_event = get_last_async(timers); last_event->timerID = category; last_event->label = label; } // else, marker for switchToThis was already inserted @@ -891,7 +891,7 @@ void visc_SwitchToSubTimer(struct visc_TimerSet *timers, char *label, // toSwitchto is already asynchronous, but if current/prev state is async // too, then DRIVER is already running if (!is_async(timers->current)) { - visc_StartTimer(&timers->timers[visc_TimerID_DRIVER]); + hpvm_StartTimer(&timers->timers[hpvm_TimerID_DRIVER]); } } } @@ -899,11 +899,11 @@ void visc_SwitchToSubTimer(struct visc_TimerSet *timers, char *label, timers->current = category; } -void visc_PrintTimerSet(struct visc_TimerSet *timers) { - visc_Timestamp wall_end = get_time(); +void hpvm_PrintTimerSet(struct hpvm_TimerSet *timers) { + hpvm_Timestamp wall_end = get_time(); - struct visc_Timer *t = timers->timers; - struct visc_SubTimer *sub = NULL; + struct hpvm_Timer *t = timers->timers; + struct hpvm_SubTimer *sub = NULL; int maxSubLength; @@ -920,13 +920,13 @@ void visc_PrintTimerSet(struct visc_TimerSet *timers) { const int maxCategoryLength = 20; int i; - for (i = 1; i < visc_TimerID_LAST; + for (i = 1; i < hpvm_TimerID_LAST; ++i) { // exclude NONE and OVRELAP from this format - if (visc_GetElapsedTime(&t[i]) != 0 || true) { + if (hpvm_GetElapsedTime(&t[i]) != 0 || true) { // Print Category Timer printf("%-*s: %.9f\n", maxCategoryLength, categories[i - 1], - visc_GetElapsedTime(&t[i])); + hpvm_GetElapsedTime(&t[i])); if (timers->sub_timer_list[i] != NULL) { sub = timers->sub_timer_list[i]->subtimer_list; @@ -949,24 +949,24 @@ void visc_PrintTimerSet(struct visc_TimerSet *timers) { // Print SubTimers while (sub != NULL) { printf(" -%-*s: %.9f\n", maxSubLength, sub->label, - visc_GetElapsedTime(&sub->timer)); + hpvm_GetElapsedTime(&sub->timer)); sub = sub->next; } } } } - if (visc_GetElapsedTime(&t[visc_TimerID_OVERLAP]) != 0) + if (hpvm_GetElapsedTime(&t[hpvm_TimerID_OVERLAP]) != 0) printf("CPU/Kernel Overlap: %.9f\n", - visc_GetElapsedTime(&t[visc_TimerID_OVERLAP])); + hpvm_GetElapsedTime(&t[hpvm_TimerID_OVERLAP])); float walltime = (wall_end - timers->wall_begin) / 1e9; printf("Timer Wall Time: %.9f\n", walltime); } -void visc_DestroyTimerSet(struct visc_TimerSet *timers) { +void hpvm_DestroyTimerSet(struct hpvm_TimerSet *timers) { /* clean up all of the async event markers */ - struct visc_async_time_marker_list *event = timers->async_markers; + struct hpvm_async_time_marker_list *event = timers->async_markers; while (event != NULL) { cl_int ciErrNum = CL_SUCCESS; @@ -981,7 +981,7 @@ void visc_DestroyTimerSet(struct visc_TimerSet *timers) { } free((event)->marker); - struct visc_async_time_marker_list *next = ((event)->next); + struct hpvm_async_time_marker_list *next = ((event)->next); free(event); @@ -990,10 +990,10 @@ void visc_DestroyTimerSet(struct visc_TimerSet *timers) { } int i = 0; - for (i = 0; i < visc_TimerID_LAST; ++i) { + for (i = 0; i < hpvm_TimerID_LAST; ++i) { if (timers->sub_timer_list[i] != NULL) { - struct visc_SubTimer *subtimer = timers->sub_timer_list[i]->subtimer_list; - struct visc_SubTimer *prev = NULL; + struct hpvm_SubTimer *subtimer = timers->sub_timer_list[i]->subtimer_list; + struct hpvm_SubTimer *prev = NULL; while (subtimer != NULL) { free(subtimer->label); prev = subtimer; @@ -1009,7 +1009,7 @@ void visc_DestroyTimerSet(struct visc_TimerSet *timers) { #define BUFFER_SIZE 1 // Launch API for a streaming dataflow graph -void *llvm_visc_streamLaunch(void (*LaunchFunc)(void *, void *), void *args) { +void *llvm_hpvm_streamLaunch(void (*LaunchFunc)(void *, void *), void *args) { DFNodeContext_X86 *Context = (DFNodeContext_X86 *)malloc(sizeof(DFNodeContext_X86)); @@ -1031,7 +1031,7 @@ void *llvm_visc_streamLaunch(void (*LaunchFunc)(void *, void *), void *args) { } // Push API for a streaming dataflow graph -void llvm_visc_streamPush(void *graphID, void *args) { +void llvm_hpvm_streamPush(void *graphID, void *args) { DEBUG(cout << "StreamPush -- Graph: " << graphID << ", Arguments: " << args << flush << "\n"); DFNodeContext_X86 *Ctx = (DFNodeContext_X86 *)graphID; @@ -1044,17 +1044,17 @@ void llvm_visc_streamPush(void *graphID, void *args) { if (Ctx->BindInSourcePort->at(j) == i) { // Push to all bind buffers connected to parent node at this port // DEBUG(cout << "\tPushing Value " << element << " to buffer\n"); - llvm_visc_bufferPush(Ctx->BindInputBuffers->at(j), element); + llvm_hpvm_bufferPush(Ctx->BindInputBuffers->at(j), element); } } } // Push 0 in isLastInput buffers of all child nodes for (CircularBuffer<uint64_t> *buffer : *(Ctx->isLastInputBuffers)) - llvm_visc_bufferPush(buffer, 0); + llvm_hpvm_bufferPush(buffer, 0); } // Pop API for a streaming dataflow graph -void *llvm_visc_streamPop(void *graphID) { +void *llvm_hpvm_streamPop(void *graphID) { DEBUG(cout << "StreamPop -- Graph: " << graphID << flush << "\n"); DFNodeContext_X86 *Ctx = (DFNodeContext_X86 *)graphID; unsigned totalBytes = 0; @@ -1063,7 +1063,7 @@ void *llvm_visc_streamPop(void *graphID) { void *output = malloc(totalBytes); unsigned offset = 0; for (unsigned i = 0; i < Ctx->BindOutputBuffers->size(); i++) { - uint64_t element = llvm_visc_bufferPop(Ctx->BindOutputBuffers->at(i)); + uint64_t element = llvm_hpvm_bufferPop(Ctx->BindOutputBuffers->at(i)); // DEBUG(cout << "\tPopped Value " << element << " from buffer\n"); memcpy((char *)output + offset, &element, Ctx->BindOutSizes->at(i)); offset += Ctx->BindOutSizes->at(i); @@ -1072,24 +1072,24 @@ void *llvm_visc_streamPop(void *graphID) { } // Wait API for a streaming dataflow graph -void llvm_visc_streamWait(void *graphID) { +void llvm_hpvm_streamWait(void *graphID) { DEBUG(cout << "StreamWait -- Graph: " << graphID << flush << "\n"); DFNodeContext_X86 *Ctx = (DFNodeContext_X86 *)graphID; // Push garbage to all other input buffers for (unsigned i = 0; i < Ctx->BindInputBuffers->size(); i++) { uint64_t element = 0; // DEBUG(cout << "\tPushing Value " << element << " to buffer\n"); - llvm_visc_bufferPush(Ctx->BindInputBuffers->at(i), element); + llvm_hpvm_bufferPush(Ctx->BindInputBuffers->at(i), element); } // Push 1 in isLastInput buffers of all child nodes for (unsigned i = 0; i < Ctx->isLastInputBuffers->size(); i++) - llvm_visc_bufferPush(Ctx->isLastInputBuffers->at(i), 1); + llvm_hpvm_bufferPush(Ctx->isLastInputBuffers->at(i), 1); - llvm_visc_freeThreads(graphID); + llvm_hpvm_freeThreads(graphID); } // Create a buffer and return the bufferID -void *llvm_visc_createBindInBuffer(void *graphID, uint64_t size, +void *llvm_hpvm_createBindInBuffer(void *graphID, uint64_t size, unsigned inArgPort) { DEBUG(cout << "Create BindInBuffer -- Graph: " << graphID << ", Size: " << size << flush << "\n"); @@ -1104,7 +1104,7 @@ void *llvm_visc_createBindInBuffer(void *graphID, uint64_t size, return bufferID; } -void *llvm_visc_createBindOutBuffer(void *graphID, uint64_t size) { +void *llvm_hpvm_createBindOutBuffer(void *graphID, uint64_t size) { DEBUG(cout << "Create BindOutBuffer -- Graph: " << graphID << ", Size: " << size << flush << "\n"); DFNodeContext_X86 *Context = (DFNodeContext_X86 *)graphID; @@ -1116,7 +1116,7 @@ void *llvm_visc_createBindOutBuffer(void *graphID, uint64_t size) { Context->BindOutSizes->push_back(size); return bufferID; } -void *llvm_visc_createEdgeBuffer(void *graphID, uint64_t size) { +void *llvm_hpvm_createEdgeBuffer(void *graphID, uint64_t size) { DEBUG(cout << "Create EdgeBuffer -- Graph: " << graphID << ", Size: " << size << flush << "\n"); DFNodeContext_X86 *Context = (DFNodeContext_X86 *)graphID; @@ -1129,7 +1129,7 @@ void *llvm_visc_createEdgeBuffer(void *graphID, uint64_t size) { return bufferID; } -void *llvm_visc_createLastInputBuffer(void *graphID, uint64_t size) { +void *llvm_hpvm_createLastInputBuffer(void *graphID, uint64_t size) { DEBUG(cout << "Create isLastInputBuffer -- Graph: " << graphID << ", Size: " << size << flush << "\n"); DFNodeContext_X86 *Context = (DFNodeContext_X86 *)graphID; @@ -1142,7 +1142,7 @@ void *llvm_visc_createLastInputBuffer(void *graphID, uint64_t size) { } // Free buffers -void llvm_visc_freeBuffers(void *graphID) { +void llvm_hpvm_freeBuffers(void *graphID) { DEBUG(cout << "Free all buffers -- Graph: " << graphID << flush << "\n"); DFNodeContext_X86 *Context = (DFNodeContext_X86 *)graphID; for (CircularBuffer<uint64_t> *bufferID : *(Context->BindInputBuffers)) @@ -1156,19 +1156,19 @@ void llvm_visc_freeBuffers(void *graphID) { } // Pop an element from the buffer -uint64_t llvm_visc_bufferPop(void *bufferID) { +uint64_t llvm_hpvm_bufferPop(void *bufferID) { CircularBuffer<uint64_t> *buffer = (CircularBuffer<uint64_t> *)bufferID; return buffer->pop(); } // Push an element into the buffer -void llvm_visc_bufferPush(void *bufferID, uint64_t element) { +void llvm_hpvm_bufferPush(void *bufferID, uint64_t element) { CircularBuffer<uint64_t> *buffer = (CircularBuffer<uint64_t> *)bufferID; buffer->push(element); } // Create a thread -void llvm_visc_createThread(void *graphID, void *(*Func)(void *), +void llvm_hpvm_createThread(void *graphID, void *(*Func)(void *), void *arguments) { DEBUG(cout << "Create Thread -- Graph: " << graphID << ", Func: " << Func << ", Args: " << arguments << flush << "\n"); @@ -1182,7 +1182,7 @@ void llvm_visc_createThread(void *graphID, void *(*Func)(void *), } // Wait for thread to finish -void llvm_visc_freeThreads(void *graphID) { +void llvm_hpvm_freeThreads(void *graphID) { DEBUG(cout << "Free Threads -- Graph: " << graphID << flush << "\n"); DFNodeContext_X86 *Ctx = (DFNodeContext_X86 *)graphID; for (pthread_t thread : *(Ctx->threads)) @@ -1191,7 +1191,7 @@ void llvm_visc_freeThreads(void *graphID) { /************************ OPENCL & PTHREAD API ********************************/ -void *llvm_visc_x86_launch(void *(*rootFunc)(void *), void *arguments) { +void *llvm_hpvm_x86_launch(void *(*rootFunc)(void *), void *arguments) { DFNodeContext_X86 *Context = (DFNodeContext_X86 *)malloc(sizeof(DFNodeContext_X86)); // int err; @@ -1202,7 +1202,7 @@ void *llvm_visc_x86_launch(void *(*rootFunc)(void *), void *arguments) { return Context; } -void llvm_visc_x86_wait(void *graphID) { +void llvm_hpvm_x86_wait(void *graphID) { DEBUG(cout << "Waiting for pthread to finish ...\n"); // DFNodeContext_X86* Context = (DFNodeContext_X86*) graphID; // pthread_join(Context->threadID, NULL); @@ -1210,9 +1210,9 @@ void llvm_visc_x86_wait(void *graphID) { DEBUG(cout << "\t... pthread Done!\n"); } -void *llvm_visc_ocl_initContext(enum visc::Target T) { +void *llvm_hpvm_ocl_initContext(enum hpvm::Target T) { pthread_mutex_lock(&ocl_mtx); - DEBUG(std::string Target = T == visc::GPU_TARGET ? "GPU" : "SPIR"); + DEBUG(std::string Target = T == hpvm::GPU_TARGET ? "GPU" : "SPIR"); DEBUG(cout << "Initializing Context for " << Target << " device\n"); cl_uint numPlatforms; cl_int errcode; @@ -1249,10 +1249,10 @@ void *llvm_visc_ocl_initContext(enum visc::Target T) { // assert(numPlatforms >= 2 && "Expecting two OpenCL platforms"); // Choose second one which is X86 AVX cl_context_properties properties[] = { - CL_CONTEXT_PLATFORM, (long)platforms[T == visc::GPU_TARGET ? 0 : 1], 0}; + CL_CONTEXT_PLATFORM, (long)platforms[T == hpvm::GPU_TARGET ? 0 : 1], 0}; globalOCLContext = clCreateContextFromType( properties, - T == visc::GPU_TARGET ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU, NULL, + T == hpvm::GPU_TARGET ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU, NULL, NULL, &errcode); // get the list of OCL devices associated with context size_t dataBytes; @@ -1264,7 +1264,7 @@ void *llvm_visc_ocl_initContext(enum visc::Target T) { errcode |= clGetContextInfo(globalOCLContext, CL_CONTEXT_DEVICES, dataBytes, clDevices, NULL); checkErr(errcode, CL_SUCCESS, "Failure to get context info"); - if (false && T == visc::SPIR_TARGET) { + if (false && T == hpvm::SPIR_TARGET) { cl_device_partition_property props[4]; props[0] = CL_DEVICE_PARTITION_BY_COUNTS; props[1] = NUM_CORES; @@ -1290,13 +1290,13 @@ void *llvm_visc_ocl_initContext(enum visc::Target T) { checkErr(errcode, CL_SUCCESS, "Failure to create OCL context"); DEBUG(cout << "Initialize Kernel Timer\n"); - visc_InitializeTimerSet(&kernel_timer); + hpvm_InitializeTimerSet(&kernel_timer); pthread_mutex_unlock(&ocl_mtx); return globalOCLContext; } -void llvm_visc_ocl_clearContext(void *graphID) { +void llvm_hpvm_ocl_clearContext(void *graphID) { pthread_mutex_lock(&ocl_mtx); DEBUG(cout << "Clear Context\n"); DFNodeContext_OCL *Context = (DFNodeContext_OCL *)graphID; @@ -1309,12 +1309,12 @@ void llvm_visc_ocl_clearContext(void *graphID) { // DEBUG(cout << "Released context at: " << globalOCLContext); free(Context); DEBUG(cout << "Done with OCL kernel\n"); - cout << "Printing VISC Timer: KernelTimer\n"; - visc_PrintTimerSet(&kernel_timer); + cout << "Printing HPVM Timer: KernelTimer\n"; + hpvm_PrintTimerSet(&kernel_timer); pthread_mutex_unlock(&ocl_mtx); } -void llvm_visc_ocl_argument_shared(void *graphID, int arg_index, size_t size) { +void llvm_hpvm_ocl_argument_shared(void *graphID, int arg_index, size_t size) { pthread_mutex_lock(&ocl_mtx); DEBUG(cout << "Set Shared Memory Input:"); DEBUG(cout << "\tArgument Index = " << arg_index << ", Size = " << size @@ -1329,7 +1329,7 @@ void llvm_visc_ocl_argument_shared(void *graphID, int arg_index, size_t size) { pthread_mutex_unlock(&ocl_mtx); } -void llvm_visc_ocl_argument_scalar(void *graphID, void *input, int arg_index, +void llvm_hpvm_ocl_argument_scalar(void *graphID, void *input, int arg_index, size_t size) { pthread_mutex_lock(&ocl_mtx); DEBUG(cout << "Set Scalar Input:"); @@ -1345,7 +1345,7 @@ void llvm_visc_ocl_argument_scalar(void *graphID, void *input, int arg_index, pthread_mutex_unlock(&ocl_mtx); } -void *llvm_visc_ocl_argument_ptr(void *graphID, void *input, int arg_index, +void *llvm_hpvm_ocl_argument_ptr(void *graphID, void *input, int arg_index, size_t size, bool isInput, bool isOutput) { pthread_mutex_lock(&ocl_mtx); DEBUG(cout << "Set Pointer Input:"); @@ -1359,7 +1359,7 @@ void *llvm_visc_ocl_argument_ptr(void *graphID, void *input, int arg_index, pthread_mutex_unlock(&ocl_mtx); // Check with runtime the location of this memory - cl_mem d_input = (cl_mem)llvm_visc_ocl_request_mem(input, size, Context, + cl_mem d_input = (cl_mem)llvm_hpvm_ocl_request_mem(input, size, Context, isInput, isOutput); pthread_mutex_lock(&ocl_mtx); @@ -1374,7 +1374,7 @@ void *llvm_visc_ocl_argument_ptr(void *graphID, void *input, int arg_index, return d_input; } -void *llvm_visc_ocl_output_ptr(void *graphID, int arg_index, size_t size) { +void *llvm_hpvm_ocl_output_ptr(void *graphID, int arg_index, size_t size) { pthread_mutex_lock(&ocl_mtx); DEBUG(cout << "Set device memory for Output Struct:"); DEBUG(cout << "\tArgument Index = " << arg_index << ", Size = " << size @@ -1396,13 +1396,13 @@ void *llvm_visc_ocl_output_ptr(void *graphID, int arg_index, size_t size) { return d_output; } -void llvm_visc_ocl_free(void *ptr) { +void llvm_hpvm_ocl_free(void *ptr) { // DEBUG(cout << "Release Device Pointer: " << ptr << flush << "\n"); // cl_mem d_ptr = (cl_mem) ptr; // clReleaseMemObject(d_ptr); } -void *llvm_visc_ocl_getOutput(void *graphID, void *h_output, void *d_output, +void *llvm_hpvm_ocl_getOutput(void *graphID, void *h_output, void *d_output, size_t size) { pthread_mutex_lock(&ocl_mtx); DEBUG(cout << "Get Output:\n"); @@ -1421,7 +1421,7 @@ void *llvm_visc_ocl_getOutput(void *graphID, void *h_output, void *d_output, return h_output; } -void *llvm_visc_ocl_executeNode(void *graphID, unsigned workDim, +void *llvm_hpvm_ocl_executeNode(void *graphID, unsigned workDim, const size_t *localWorkSize, const size_t *globalWorkSize) { pthread_mutex_lock(&ocl_mtx); @@ -1467,7 +1467,7 @@ void *llvm_visc_ocl_executeNode(void *graphID, unsigned workDim, // pthread_mutex_lock(&ocl_mtx); clFinish(Context->clCommandQue); // pthread_mutex_unlock(&ocl_mtx); - visc_SwitchToTimer(&kernel_timer, visc_TimerID_COMPUTATION); + hpvm_SwitchToTimer(&kernel_timer, hpvm_TimerID_COMPUTATION); // for(int i=0 ;i < NUM_TESTS; i++) { // cout << "Iteration = " << i << flush << "\n"; // pthread_mutex_lock(&ocl_mtx); @@ -1480,7 +1480,7 @@ void *llvm_visc_ocl_executeNode(void *graphID, unsigned workDim, // pthread_mutex_lock(&ocl_mtx); clFinish(Context->clCommandQue); // pthread_mutex_unlock(&ocl_mtx); - visc_SwitchToTimer(&kernel_timer, visc_TimerID_NONE); + hpvm_SwitchToTimer(&kernel_timer, hpvm_TimerID_NONE); pthread_mutex_unlock(&ocl_mtx); return event; @@ -1529,7 +1529,7 @@ static char *LoadProgSource(const char *Filename, size_t *szFinalLength) { return cSourceString; } -void *llvm_visc_ocl_launch(const char *FileName, const char *KernelName) { +void *llvm_hpvm_ocl_launch(const char *FileName, const char *KernelName) { pthread_mutex_lock(&ocl_mtx); DEBUG(cout << "Launch OCL Kernel\n"); // Initialize OpenCL @@ -1599,7 +1599,7 @@ void *llvm_visc_ocl_launch(const char *FileName, const char *KernelName) { return Context; } -void llvm_visc_ocl_wait(void *graphID) { +void llvm_hpvm_ocl_wait(void *graphID) { pthread_mutex_lock(&ocl_mtx); DEBUG(cout << "Wait\n"); DFNodeContext_OCL *Context = (DFNodeContext_OCL *)graphID; @@ -1609,27 +1609,27 @@ void llvm_visc_ocl_wait(void *graphID) { pthread_mutex_unlock(&ocl_mtx); } -void llvm_visc_switchToTimer(void **timerSet, enum visc_TimerID timer) { +void llvm_hpvm_switchToTimer(void **timerSet, enum hpvm_TimerID timer) { // cout << "Switching to timer " << timer << flush << "\n"; pthread_mutex_lock(&ocl_mtx); - // visc_SwitchToTimer((visc_TimerSet*)(*timerSet), timer); + // hpvm_SwitchToTimer((hpvm_TimerSet*)(*timerSet), timer); pthread_mutex_unlock(&ocl_mtx); } -void llvm_visc_printTimerSet(void **timerSet, char *timerName) { +void llvm_hpvm_printTimerSet(void **timerSet, char *timerName) { pthread_mutex_lock(&ocl_mtx); - cout << "Printing VISC Timer: "; + cout << "Printing HPVM Timer: "; if (timerName != NULL) cout << timerName << flush << "\n"; else cout << "Anonymous\n"; - visc_PrintTimerSet((visc_TimerSet *)(*timerSet)); + hpvm_PrintTimerSet((hpvm_TimerSet *)(*timerSet)); pthread_mutex_unlock(&ocl_mtx); } -void *llvm_visc_initializeTimerSet() { +void *llvm_hpvm_initializeTimerSet() { pthread_mutex_lock(&ocl_mtx); - visc_TimerSet *TS = (visc_TimerSet *)malloc(sizeof(visc_TimerSet)); - visc_InitializeTimerSet(TS); + hpvm_TimerSet *TS = (hpvm_TimerSet *)malloc(sizeof(hpvm_TimerSet)); + hpvm_InitializeTimerSet(TS); pthread_mutex_unlock(&ocl_mtx); return TS; } diff --git a/hpvm/projects/visc-rt/visc-rt.h b/hpvm/projects/hpvm-rt/hpvm-rt.h similarity index 74% rename from hpvm/projects/visc-rt/visc-rt.h rename to hpvm/projects/hpvm-rt/hpvm-rt.h index d9d946f1da14245f8cde426e7b5ea92f791537f5..519b467c9047fbbdeea3a4610bedda3a77c36fe2 100644 --- a/hpvm/projects/visc-rt/visc-rt.h +++ b/hpvm/projects/hpvm-rt/hpvm-rt.h @@ -2,8 +2,8 @@ * * (c) 2010 The Board of Trustees of the University of Illinois. */ -#ifndef VISC_RT_HEADER -#define VISC_RT_HEADER +#ifndef HPVM_RT_HEADER +#define HPVM_RT_HEADER #include <ctime> #include <iostream> @@ -13,8 +13,8 @@ #include <vector> //#include <condition_variable> -#include "../../include/SupportVISC/VISCHint.h" -#include "../../include/SupportVISC/VISCTimer.h" +#include "../../include/SupportHPVM/HPVMHint.h" +#include "../../include/SupportHPVM/HPVMTimer.h" #ifndef DEBUG_BUILD #define DEBUG(s) \ @@ -64,12 +64,12 @@ public: unsigned getNumDim() const { return numDim; } }; -void llvm_visc_x86_dstack_push(unsigned n, uint64_t limitX = 0, uint64_t iX = 0, +void llvm_hpvm_x86_dstack_push(unsigned n, uint64_t limitX = 0, uint64_t iX = 0, uint64_t limitY = 0, uint64_t iY = 0, uint64_t limitZ = 0, uint64_t iZ = 0); -void llvm_visc_x86_dstack_pop(); -uint64_t llvm_visc_x86_getDimLimit(unsigned level, unsigned dim); -uint64_t llvm_visc_x86_getDimInstance(unsigned level, unsigned dim); +void llvm_hpvm_x86_dstack_pop(); +uint64_t llvm_hpvm_x86_getDimLimit(unsigned level, unsigned dim); +uint64_t llvm_hpvm_x86_getDimInstance(unsigned level, unsigned dim); /********************* Memory Tracker **********************************/ class MemTrackerEntry { @@ -143,32 +143,32 @@ public: } }; -void llvm_visc_track_mem(void *, size_t); -void llvm_visc_untrack_mem(void *); -void *llvm_visc_request_mem(void *, size_t); +void llvm_hpvm_track_mem(void *, size_t); +void llvm_hpvm_untrack_mem(void *); +void *llvm_hpvm_request_mem(void *, size_t); /*********************** OPENCL & PTHREAD API **************************/ -void *llvm_visc_x86_launch(void *(void *), void *); -void llvm_visc_x86_wait(void *); -void *llvm_visc_ocl_initContext(enum visc::Target); - -void *llvm_visc_x86_argument_ptr(void *, size_t); - -void llvm_visc_ocl_clearContext(void *); -void llvm_visc_ocl_argument_shared(void *, int, size_t); -void llvm_visc_ocl_argument_scalar(void *, void *, int, size_t); -void *llvm_visc_ocl_argument_ptr(void *, void *, int, size_t, bool, bool); -void *llvm_visc_ocl_output_ptr(void *, int, size_t); -void llvm_visc_ocl_free(void *); -void *llvm_visc_ocl_getOutput(void *, void *, void *, size_t); -void *llvm_visc_ocl_executeNode(void *, unsigned, const size_t *, +void *llvm_hpvm_x86_launch(void *(void *), void *); +void llvm_hpvm_x86_wait(void *); +void *llvm_hpvm_ocl_initContext(enum hpvm::Target); + +void *llvm_hpvm_x86_argument_ptr(void *, size_t); + +void llvm_hpvm_ocl_clearContext(void *); +void llvm_hpvm_ocl_argument_shared(void *, int, size_t); +void llvm_hpvm_ocl_argument_scalar(void *, void *, int, size_t); +void *llvm_hpvm_ocl_argument_ptr(void *, void *, int, size_t, bool, bool); +void *llvm_hpvm_ocl_output_ptr(void *, int, size_t); +void llvm_hpvm_ocl_free(void *); +void *llvm_hpvm_ocl_getOutput(void *, void *, void *, size_t); +void *llvm_hpvm_ocl_executeNode(void *, unsigned, const size_t *, const size_t *); -void *llvm_visc_ocl_launch(const char *, const char *); -void llvm_visc_ocl_wait(void *); +void *llvm_hpvm_ocl_launch(const char *, const char *); +void llvm_hpvm_ocl_wait(void *); -void llvm_visc_switchToTimer(void **timerSet, enum visc_TimerID); -void llvm_visc_printTimerSet(void **timerSet, char *timerName = NULL); -void *llvm_visc_initializeTimerSet(); +void llvm_hpvm_switchToTimer(void **timerSet, enum hpvm_TimerID); +void llvm_hpvm_printTimerSet(void **timerSet, char *timerName = NULL); +void *llvm_hpvm_initializeTimerSet(); } /*************************** Pipeline API ******************************/ @@ -249,30 +249,30 @@ template <class ElementType> ElementType CircularBuffer<ElementType>::pop() { extern "C" { // Functions to push and pop values from pipeline buffers -uint64_t llvm_visc_bufferPop(void *); -void llvm_visc_bufferPush(void *, uint64_t); +uint64_t llvm_hpvm_bufferPop(void *); +void llvm_hpvm_bufferPush(void *, uint64_t); // Functions to create and destroy buffers -void *llvm_visc_createBindInBuffer(void *, uint64_t, unsigned); -void *llvm_visc_createBindOutBuffer(void *, uint64_t); -void *llvm_visc_createEdgeBuffer(void *, uint64_t); -void *llvm_visc_createLastInputBuffer(void *, uint64_t); +void *llvm_hpvm_createBindInBuffer(void *, uint64_t, unsigned); +void *llvm_hpvm_createBindOutBuffer(void *, uint64_t); +void *llvm_hpvm_createEdgeBuffer(void *, uint64_t); +void *llvm_hpvm_createLastInputBuffer(void *, uint64_t); -void llvm_visc_freeBuffers(void *); +void llvm_hpvm_freeBuffers(void *); // Functions to create and destroy threads -void llvm_visc_createThread(void *graphID, void *(*Func)(void *), void *); -void llvm_visc_freeThreads(void *); +void llvm_hpvm_createThread(void *graphID, void *(*Func)(void *), void *); +void llvm_hpvm_freeThreads(void *); // Launch API for a streaming graph. // Arguments: // (1) Launch Function: void* (void*, void*) // (2) Push Function: void (void*, std::vector<uint64_t>**, unsgined) // (3) Pop Function: void* (std::vector<uint64_t>**, unsigned) -void *llvm_visc_streamLaunch(void (*LaunchFunc)(void *, void *), void *); -void llvm_visc_streamPush(void *graphID, void *args); -void *llvm_visc_streamPop(void *graphID); -void llvm_visc_streamWait(void *graphID); +void *llvm_hpvm_streamLaunch(void (*LaunchFunc)(void *, void *), void *); +void llvm_hpvm_streamPush(void *graphID, void *args); +void *llvm_hpvm_streamPop(void *graphID); +void llvm_hpvm_streamWait(void *graphID); } -#endif // VISC_RT_HEADER +#endif // HPVM_RT_HEADER diff --git a/hpvm/projects/hpvm-rt/makefile b/hpvm/projects/hpvm-rt/makefile new file mode 100644 index 0000000000000000000000000000000000000000..927e26e254a2b2f980fed8efd8858935e9f3cbdf --- /dev/null +++ b/hpvm/projects/hpvm-rt/makefile @@ -0,0 +1,29 @@ +#LLVM_SRC_ROOT = +LLVM_BUILD_ROOT = ${LLVM_SRC_ROOT}/../build/ + +CUDA_INC_PATH = /software/cuda-9.1/include/CL/ + + +ifeq ($(NUM_CORES),) + NUM_CORES=1 +endif + +CPP_FLAGS = -I$(LLVM_SRC_ROOT)/include -I$(LLVM_BUILD_ROOT)/include -I$(CUDA_INC_PATH) -std=c++11 -D__STDC_CONSTANT_MACROS -D__STDC_LIMIT_MACROS +TARGET:=hpvm-rt + +LLVM_CC:=$(LLVM_BUILD_ROOT)/bin/clang +LLVM_CXX:=$(LLVM_BUILD_ROOT)/bin/clang++ + +OPTS = + +ifeq ($(DEBUG),1) + OPTS+=-DDEBUG_BUILD +endif + +all: $(TARGET:%=%.ll) + +$(TARGET:%=%.ll):%.ll:%.cpp %.h + $(LLVM_CXX) -DNUM_CORES=$(NUM_CORES) -O3 -S -emit-llvm $(CPP_FLAGS) $(OPTS) $< -o $@ + +clean : + rm -f $(TARGET).ll diff --git a/hpvm/projects/hpvm-rt/policy.h b/hpvm/projects/hpvm-rt/policy.h new file mode 100644 index 0000000000000000000000000000000000000000..d50e65868b376bfbcc3d4bd00d4919db677722b8 --- /dev/null +++ b/hpvm/projects/hpvm-rt/policy.h @@ -0,0 +1,108 @@ +#ifndef __POLICY__ +#define __POLICY__ + +#include "device_abstraction.h" +#include <string> + +/************************* Policies *************************************/ +class Policy { +public: + virtual int getVersion(const char *, int64_t) = 0; + virtual ~Policy(){}; +}; + +class ConstPolicy : public Policy { +public: + ConstPolicy(int deviceID) : deviceID(deviceID) {} + + int getVersion(const char *, int64_t) override { return deviceID; } + +private: + int deviceID; +}; + +class NodePolicy : public Policy { + virtual int getVersion(const char *name, int64_t it) override { + std::string s(name); + // std::string NodeNames[1] = { + // "_Z9mysgemmNTPfiS_iS_iiff_clonedInternal_level2_cloned" }; + std::string NodeNames[] = { + "WrapperGaussianSmoothing_cloned", + "WrapperlaplacianEstimate_cloned", + "WrapperComputeZeroCrossings_cloned", + "WrapperComputeGradient_cloned", + "WrapperComputeMaxGradient_cloned", + "WrapperRejectZeroCrossings_cloned", + }; + // if (!s.compare(NodeNames[4])) { + // std::cout << s << ": CPU" << "\n"; + // return 0; + //} + return 2; + } +}; + +class IterationPolicy : public Policy { + virtual int getVersion(const char *name, int64_t it) override { + if ((it % 10 == 0) || (it % 10 == 1)) + return 0; + else + return 2; + } +}; + +class DeviceStatusPolicy : public Policy { + virtual int getVersion(const char *name, int64_t it) override { + if (deviceStatus) { + // std::cout << "Returning GPU\n"; + return 2; + } else { + // std::cout << "Returning CPU\n"; + return 0; + } + } +}; + +/* ------------------------------------------------------------------------- */ +// Added for the CFAR interactive policy demo. + +class InteractivePolicy : public Policy { +private: + // 0 :for CPU, 1 for GPU, 2 for Vector + unsigned int userTargetDeviceChoice; + // Used to end thread execution + bool end; + // Thread that will update userTargetDeviceChoice + std::thread userTargetDeviceChoiceThread; + // Thread function + void updateUserTargetChoice() { + while (!end) { + std::cout << "Select target device (0 for CPU, 1 fpr GPU): "; + std::cin >> userTargetDeviceChoice; + if (userTargetDeviceChoice > 1) { + std::cout << "Invalid target device. Selecting GPU instead.\n"; + userTargetDeviceChoice = 1; + } + } + } + +public: + // Inherited method, erquired for every policy object + virtual int getVersion(const char *name, int64_t it) { + return userTargetDeviceChoice; + } + + InteractivePolicy() { + userTargetDeviceChoice = 1; + end = false; + userTargetDeviceChoiceThread = + std::thread(&InteractivePolicy::updateUserTargetChoice, this); + } + + ~InteractivePolicy() { + end = true; + userTargetDeviceChoiceThread.join(); + } +}; + +#endif // __POLICY__ diff --git a/hpvm/projects/llvm-cbe/lib/Target/CBackend/CBackend.cpp b/hpvm/projects/llvm-cbe/lib/Target/CBackend/CBackend.cpp index c61573a13abcc09b1db10d86e141264a8b1c1760..50a7e6848350ef99c96f56cb5ac6d2d75308f398 100644 --- a/hpvm/projects/llvm-cbe/lib/Target/CBackend/CBackend.cpp +++ b/hpvm/projects/llvm-cbe/lib/Target/CBackend/CBackend.cpp @@ -14,14 +14,14 @@ #include "CBackend.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/Config/config.h" #include "llvm/IR/InstIterator.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Host.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/TargetRegistry.h" -#include "llvm/Support/Host.h" -#include "llvm/CodeGen/TargetLowering.h" -#include "llvm/Config/config.h" #include "llvm/Transforms/Utils.h" #include <algorithm> @@ -29,14 +29,13 @@ #include <iostream> - //#include "PHINodePass.h" -//Jackson Korba 9/29/14 +// Jackson Korba 9/29/14 #ifndef DEBUG_TYPE #define DEBUG_TYPE "" #endif -//End Modification +// End Modification #define DEBUG(x) x // Some ms header decided to define setjmp as _setjmp, undo this for this file @@ -53,7 +52,8 @@ extern "C" void LLVMInitializeCBackendTarget() { char CWriter::ID = 0; -// extra (invalid) Ops tags for tracking unary ops as a special case of the available binary ops +// extra (invalid) Ops tags for tracking unary ops as a special case of the +// available binary ops enum UnaryOps { BinaryNeg = Instruction::OtherOpsEnd + 1, BinaryNot, @@ -62,19 +62,16 @@ enum UnaryOps { static bool isEmptyType(Type *Ty) { if (StructType *STy = dyn_cast<StructType>(Ty)) return STy->getNumElements() == 0 || - std::all_of(STy->element_begin(), STy->element_end(), [](Type *T){ return isEmptyType(T); }); + std::all_of(STy->element_begin(), STy->element_end(), + [](Type *T) { return isEmptyType(T); }); if (VectorType *VTy = dyn_cast<VectorType>(Ty)) - return VTy->getNumElements() == 0 || - isEmptyType(VTy->getElementType()); + return VTy->getNumElements() == 0 || isEmptyType(VTy->getElementType()); if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) - return ATy->getNumElements() == 0 || - isEmptyType(ATy->getElementType()); + return ATy->getNumElements() == 0 || isEmptyType(ATy->getElementType()); return Ty->isVoidTy(); } -bool CWriter::isEmptyType(Type *Ty) const { - return ::isEmptyType(Ty); -} +bool CWriter::isEmptyType(Type *Ty) const { return ::isEmptyType(Ty); } /// isAddressExposed - Return true if the specified value's name needs to /// have its address taken in order to get a C value of the correct type. @@ -108,10 +105,9 @@ bool CWriter::isInlinableInst(Instruction &I) const { } // Must be an expression, must be used exactly once. If it is dead, we // emit it inline where it would go. - if (isEmptyType(I.getType()) || !I.hasOneUse() || - I.isTerminator() || isa<CallInst>(I) || isa<PHINode>(I) || - isa<LoadInst>(I) || isa<VAArgInst>(I) || isa<InsertElementInst>(I) || - isa<InsertValueInst>(I)) + if (isEmptyType(I.getType()) || !I.hasOneUse() || I.isTerminator() || + isa<CallInst>(I) || isa<PHINode>(I) || isa<LoadInst>(I) || + isa<VAArgInst>(I) || isa<InsertElementInst>(I) || isa<InsertValueInst>(I)) // Don't inline a load across a store or other bad things! return false; @@ -133,17 +129,18 @@ bool CWriter::isInlinableInst(Instruction &I) const { AllocaInst *CWriter::isDirectAlloca(Value *V) const { DEBUG(errs() << "Checking if " << *V << " is a direct alloca!\n"); AllocaInst *AI = dyn_cast<AllocaInst>(V); - if (!AI) return 0; + if (!AI) + return 0; // Modification to inline fixed size array alloca! if (AI->isArrayAllocation()) - return AI; // FIXME: we can also inline fixed size array allocas! + return AI; // FIXME: we can also inline fixed size array allocas! if (AI->getParent() != &AI->getParent()->getParent()->getEntryBlock()) return 0; return AI; } // isInlineAsm - Check if the instruction is a call to an inline asm chunk. -bool CWriter::isInlineAsm(Instruction& I) const { +bool CWriter::isInlineAsm(Instruction &I) const { if (CallInst *CI = dyn_cast<CallInst>(&I)) return isa<InlineAsm>(CI->getCalledValue()); return false; @@ -161,18 +158,19 @@ bool CWriter::runOnFunction(Function &F) { PDT = &getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree(); // Adding Scalar Evolution Pass for loop induction variable SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); - //Adding Dominator Tree Pass + // Adding Dominator Tree Pass DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); // Adding Assumption Cache AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); // Adding IVUsers Pass for loop recongnition // IU = &getAnalysis<IVUsersWrapperPass>().getIU(); - BasicBlock* entry = &(F.getEntryBlock()); - for (df_iterator<BasicBlock*> BI = df_begin(entry), BE = df_end(entry); BI!=BE; ++BI) { + BasicBlock *entry = &(F.getEntryBlock()); + for (df_iterator<BasicBlock *> BI = df_begin(entry), BE = df_end(entry); + BI != BE; ++BI) { BasicBlock *BB = *BI; if (Loop *L = LI->getLoopFor(&*BB)) { - if(simplifyLoop(L, DT, LI, SE, AC, nullptr, /*true*/false)) { + if (simplifyLoop(L, DT, LI, SE, AC, nullptr, /*true*/ false)) { DEBUG(errs() << "Simplified loop!\n" << *L << "\n"); } } @@ -180,7 +178,6 @@ bool CWriter::runOnFunction(Function &F) { // Get rid of intrinsics we can't handle. lowerIntrinsics(F); - printFunction(F); LI = NULL; @@ -196,15 +193,15 @@ static std::string CBEMangle(const std::string &S) { Result += S[i]; } else { Result += '_'; - Result += 'A'+(S[i]&15); - Result += 'A'+((S[i]>>4)&15); + Result += 'A' + (S[i] & 15); + Result += 'A' + ((S[i] >> 4) & 15); Result += '_'; } return Result; } -raw_ostream & -CWriter::printTypeString(raw_ostream &Out, Type *Ty, bool isSigned) { +raw_ostream &CWriter::printTypeString(raw_ostream &Out, Type *Ty, + bool isSigned) { if (StructType *ST = dyn_cast<StructType>(Ty)) { assert(!isEmptyType(ST)); TypedefDeclTypes.insert(Ty); @@ -224,46 +221,51 @@ CWriter::printTypeString(raw_ostream &Out, Type *Ty, bool isSigned) { } switch (Ty->getTypeID()) { - case Type::VoidTyID: return Out << "void"; - case Type::IntegerTyID: { - unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth(); - if (NumBits == 1) - return Out << "bool"; - else { - assert(NumBits <= 128 && "Bit widths > 128 not implemented yet"); - return Out << (isSigned?"i":"u") << NumBits; - } - } - case Type::FloatTyID: return Out << "f32"; - case Type::DoubleTyID: return Out << "f64"; - case Type::X86_FP80TyID: return Out << "f80"; - case Type::PPC_FP128TyID: - case Type::FP128TyID: return Out << "f128"; - - case Type::X86_MMXTyID: - return Out << (isSigned ? "i32y2" : "u32y2"); - - case Type::VectorTyID: { - TypedefDeclTypes.insert(Ty); - VectorType *VTy = cast<VectorType>(Ty); - assert(VTy->getNumElements() != 0); - printTypeString(Out, VTy->getElementType(), isSigned); - return Out << "x" << VTy->getNumElements(); - } - - case Type::ArrayTyID: { - TypedefDeclTypes.insert(Ty); - ArrayType *ATy = cast<ArrayType>(Ty); - assert(ATy->getNumElements() != 0); - printTypeString(Out, ATy->getElementType(), isSigned); - return Out << "a" << ATy->getNumElements(); - } + case Type::VoidTyID: + return Out << "void"; + case Type::IntegerTyID: { + unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth(); + if (NumBits == 1) + return Out << "bool"; + else { + assert(NumBits <= 128 && "Bit widths > 128 not implemented yet"); + return Out << (isSigned ? "i" : "u") << NumBits; + } + } + case Type::FloatTyID: + return Out << "f32"; + case Type::DoubleTyID: + return Out << "f64"; + case Type::X86_FP80TyID: + return Out << "f80"; + case Type::PPC_FP128TyID: + case Type::FP128TyID: + return Out << "f128"; + + case Type::X86_MMXTyID: + return Out << (isSigned ? "i32y2" : "u32y2"); + + case Type::VectorTyID: { + TypedefDeclTypes.insert(Ty); + VectorType *VTy = cast<VectorType>(Ty); + assert(VTy->getNumElements() != 0); + printTypeString(Out, VTy->getElementType(), isSigned); + return Out << "x" << VTy->getNumElements(); + } - default: + case Type::ArrayTyID: { + TypedefDeclTypes.insert(Ty); + ArrayType *ATy = cast<ArrayType>(Ty); + assert(ATy->getNumElements() != 0); + printTypeString(Out, ATy->getElementType(), isSigned); + return Out << "a" << ATy->getNumElements(); + } + + default: #ifndef NDEBUG - errs() << "Unknown primitive type: " << *Ty << "\n"; + errs() << "Unknown primitive type: " << *Ty << "\n"; #endif - llvm_unreachable(0); + llvm_unreachable(0); } } @@ -278,8 +280,9 @@ std::string CWriter::getStructName(StructType *ST) { return "struct l_unnamed_" + utostr(id); } -std::string CWriter::getFunctionName(FunctionType *FT, - std::pair<AttributeList, CallingConv::ID> PAL) { +std::string +CWriter::getFunctionName(FunctionType *FT, + std::pair<AttributeList, CallingConv::ID> PAL) { unsigned &id = UnnamedFunctionIDs[std::make_pair(FT, PAL)]; if (id == 0) id = ++NextFunctionNumber; @@ -293,7 +296,8 @@ std::string CWriter::getArrayName(ArrayType *AT) { // value semantics (avoiding the array "decay"). assert(!isEmptyType(AT)); printTypeName(ArrayInnards, AT->getElementType(), false); - return "struct l_array_" + utostr(AT->getNumElements()) + '_' + CBEMangle(ArrayInnards.str()); + return "struct l_array_" + utostr(AT->getNumElements()) + '_' + + CBEMangle(ArrayInnards.str()); } std::string CWriter::getVectorName(VectorType *VT, bool Aligned) { @@ -304,95 +308,125 @@ std::string CWriter::getVectorName(VectorType *VT, bool Aligned) { // if (Aligned) // Out << "__MSALIGN__(" << TD->getABITypeAlignment(VT) << ") "; printTypeName(VectorInnards, VT->getElementType(), false); - return "struct l_vector_" + utostr(VT->getNumElements()) + '_' + CBEMangle(VectorInnards.str()); + return "struct l_vector_" + utostr(VT->getNumElements()) + '_' + + CBEMangle(VectorInnards.str()); } - static const std::string getCmpPredicateName(CmpInst::Predicate P) { switch (P) { - case FCmpInst::FCMP_FALSE: return "0"; - case FCmpInst::FCMP_OEQ: return "oeq"; - case FCmpInst::FCMP_OGT: return "ogt"; - case FCmpInst::FCMP_OGE: return "oge"; - case FCmpInst::FCMP_OLT: return "olt"; - case FCmpInst::FCMP_OLE: return "ole"; - case FCmpInst::FCMP_ONE: return "one"; - case FCmpInst::FCMP_ORD: return "ord"; - case FCmpInst::FCMP_UNO: return "uno"; - case FCmpInst::FCMP_UEQ: return "ueq"; - case FCmpInst::FCMP_UGT: return "ugt"; - case FCmpInst::FCMP_UGE: return "uge"; - case FCmpInst::FCMP_ULT: return "ult"; - case FCmpInst::FCMP_ULE: return "ule"; - case FCmpInst::FCMP_UNE: return "une"; - case FCmpInst::FCMP_TRUE: return "1"; - case ICmpInst::ICMP_EQ: return "eq"; - case ICmpInst::ICMP_NE: return "ne"; - case ICmpInst::ICMP_ULE: return "ule"; - case ICmpInst::ICMP_SLE: return "sle"; - case ICmpInst::ICMP_UGE: return "uge"; - case ICmpInst::ICMP_SGE: return "sge"; - case ICmpInst::ICMP_ULT: return "ult"; - case ICmpInst::ICMP_SLT: return "slt"; - case ICmpInst::ICMP_UGT: return "ugt"; - case ICmpInst::ICMP_SGT: return "sgt"; - default: + case FCmpInst::FCMP_FALSE: + return "0"; + case FCmpInst::FCMP_OEQ: + return "oeq"; + case FCmpInst::FCMP_OGT: + return "ogt"; + case FCmpInst::FCMP_OGE: + return "oge"; + case FCmpInst::FCMP_OLT: + return "olt"; + case FCmpInst::FCMP_OLE: + return "ole"; + case FCmpInst::FCMP_ONE: + return "one"; + case FCmpInst::FCMP_ORD: + return "ord"; + case FCmpInst::FCMP_UNO: + return "uno"; + case FCmpInst::FCMP_UEQ: + return "ueq"; + case FCmpInst::FCMP_UGT: + return "ugt"; + case FCmpInst::FCMP_UGE: + return "uge"; + case FCmpInst::FCMP_ULT: + return "ult"; + case FCmpInst::FCMP_ULE: + return "ule"; + case FCmpInst::FCMP_UNE: + return "une"; + case FCmpInst::FCMP_TRUE: + return "1"; + case ICmpInst::ICMP_EQ: + return "eq"; + case ICmpInst::ICMP_NE: + return "ne"; + case ICmpInst::ICMP_ULE: + return "ule"; + case ICmpInst::ICMP_SLE: + return "sle"; + case ICmpInst::ICMP_UGE: + return "uge"; + case ICmpInst::ICMP_SGE: + return "sge"; + case ICmpInst::ICMP_ULT: + return "ult"; + case ICmpInst::ICMP_SLT: + return "slt"; + case ICmpInst::ICMP_UGT: + return "ugt"; + case ICmpInst::ICMP_SGT: + return "sgt"; + default: #ifndef NDEBUG - errs() << "Invalid icmp predicate!" << P; + errs() << "Invalid icmp predicate!" << P; #endif - llvm_unreachable(0); + llvm_unreachable(0); } } - -raw_ostream & -CWriter::printSimpleType(raw_ostream &Out, Type *Ty, bool isSigned) { +raw_ostream &CWriter::printSimpleType(raw_ostream &Out, Type *Ty, + bool isSigned) { assert((Ty->isSingleValueType() || Ty->isVoidTy()) && - "Invalid type for printSimpleType"); + "Invalid type for printSimpleType"); switch (Ty->getTypeID()) { - case Type::VoidTyID: return Out << "void"; - case Type::IntegerTyID: { - unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth(); - if (NumBits == 1) - return Out << "bool"; - else if (NumBits <= 8) - return Out << (isSigned?"char":"uchar"); - else if (NumBits <= 16) - return Out << (isSigned?"short":"ushort"); - else if (NumBits <= 32) - return Out << (isSigned?"int":"uint"); // !!FIX ME - else if (NumBits <= 64) - return Out << (isSigned?"long":"ulong"); - else { - assert(NumBits <= 128 && "Bit widths > 128 not implemented yet"); - return Out << (isSigned?"int128_t":"uint128_t"); - } - } - case Type::FloatTyID: return Out << "float"; - case Type::DoubleTyID: return Out << "double"; - // Lacking emulation of FP80 on PPC, etc., we assume whichever of these is - // present matches host 'long double'. - case Type::X86_FP80TyID: - case Type::PPC_FP128TyID: - case Type::FP128TyID: return Out << "long double"; - - case Type::X86_MMXTyID: - return Out << (isSigned?"int":"uint") << " __attribute__((vector_size(8)))"; - - default: + case Type::VoidTyID: + return Out << "void"; + case Type::IntegerTyID: { + unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth(); + if (NumBits == 1) + return Out << "bool"; + else if (NumBits <= 8) + return Out << (isSigned ? "char" : "uchar"); + else if (NumBits <= 16) + return Out << (isSigned ? "short" : "ushort"); + else if (NumBits <= 32) + return Out << (isSigned ? "int" : "uint"); // !!FIX ME + else if (NumBits <= 64) + return Out << (isSigned ? "long" : "ulong"); + else { + assert(NumBits <= 128 && "Bit widths > 128 not implemented yet"); + return Out << (isSigned ? "int128_t" : "uint128_t"); + } + } + case Type::FloatTyID: + return Out << "float"; + case Type::DoubleTyID: + return Out << "double"; + // Lacking emulation of FP80 on PPC, etc., we assume whichever of these is + // present matches host 'long double'. + case Type::X86_FP80TyID: + case Type::PPC_FP128TyID: + case Type::FP128TyID: + return Out << "long double"; + + case Type::X86_MMXTyID: + return Out << (isSigned ? "int" : "uint") + << " __attribute__((vector_size(8)))"; + + default: #ifndef NDEBUG - errs() << "Unknown primitive type: " << *Ty << "\n"; + errs() << "Unknown primitive type: " << *Ty << "\n"; #endif - llvm_unreachable(0); + llvm_unreachable(0); } } // Pass the Type* and the variable name and this prints out the variable // declaration. // -raw_ostream &CWriter::printTypeName(raw_ostream &Out, Type *Ty, - bool isSigned, - std::pair<AttributeList, CallingConv::ID> PAL) { +raw_ostream & +CWriter::printTypeName(raw_ostream &Out, Type *Ty, bool isSigned, + std::pair<AttributeList, CallingConv::ID> PAL) { if (Ty->isSingleValueType() || Ty->isVoidTy()) { if (!Ty->isPointerTy() && !Ty->isVectorTy()) @@ -403,39 +437,40 @@ raw_ostream &CWriter::printTypeName(raw_ostream &Out, Type *Ty, return Out << "void"; switch (Ty->getTypeID()) { - case Type::FunctionTyID: { - FunctionType *FTy = cast<FunctionType>(Ty); - return Out << getFunctionName(FTy, PAL); - } - case Type::StructTyID: { - TypedefDeclTypes.insert(Ty); - return Out << getStructName(cast<StructType>(Ty)); - } - - case Type::PointerTyID: { - Type *ElTy = Ty->getPointerElementType(); - return printTypeName(Out, ElTy, false) << '*'; - } - - case Type::ArrayTyID: { - TypedefDeclTypes.insert(Ty); - return Out << getArrayName(cast<ArrayType>(Ty)); - } - - case Type::VectorTyID: { - TypedefDeclTypes.insert(Ty); - return Out << getVectorName(cast<VectorType>(Ty), true); - } + case Type::FunctionTyID: { + FunctionType *FTy = cast<FunctionType>(Ty); + return Out << getFunctionName(FTy, PAL); + } + case Type::StructTyID: { + TypedefDeclTypes.insert(Ty); + return Out << getStructName(cast<StructType>(Ty)); + } - default: + case Type::PointerTyID: { + Type *ElTy = Ty->getPointerElementType(); + return printTypeName(Out, ElTy, false) << '*'; + } + + case Type::ArrayTyID: { + TypedefDeclTypes.insert(Ty); + return Out << getArrayName(cast<ArrayType>(Ty)); + } + + case Type::VectorTyID: { + TypedefDeclTypes.insert(Ty); + return Out << getVectorName(cast<VectorType>(Ty), true); + } + + default: #ifndef NDEBUG - errs() << "Unexpected type: " << *Ty << "\n"; + errs() << "Unexpected type: " << *Ty << "\n"; #endif - llvm_unreachable(0); + llvm_unreachable(0); } } -raw_ostream &CWriter::printTypeNameUnaligned(raw_ostream &Out, Type *Ty, bool isSigned) { +raw_ostream &CWriter::printTypeNameUnaligned(raw_ostream &Out, Type *Ty, + bool isSigned) { if (VectorType *VTy = dyn_cast<VectorType>(Ty)) { // MSVC doesn't handle __declspec(align) on parameters, // but we specify it for Vector (hoping the compiler will vectorize it) @@ -446,13 +481,15 @@ raw_ostream &CWriter::printTypeNameUnaligned(raw_ostream &Out, Type *Ty, bool is return printTypeName(Out, Ty, isSigned); } -raw_ostream &CWriter::printStructDeclaration(raw_ostream &Out, StructType *STy) { +raw_ostream &CWriter::printStructDeclaration(raw_ostream &Out, + StructType *STy) { if (STy->isPacked()) Out << "#ifdef _MSC_VER\n#pragma pack(push, 1)\n#endif\n"; Out << getStructName(STy) << " {\n"; unsigned Idx = 0; for (StructType::element_iterator I = STy->element_begin(), - E = STy->element_end(); I != E; ++I, Idx++) { + E = STy->element_end(); + I != E; ++I, Idx++) { Out << " "; bool empty = isEmptyType(*I); if (empty) @@ -472,21 +509,23 @@ raw_ostream &CWriter::printStructDeclaration(raw_ostream &Out, StructType *STy) return Out; } -raw_ostream &CWriter::printFunctionDeclaration(raw_ostream &Out, FunctionType *Ty, - std::pair<AttributeList, CallingConv::ID> PAL){ +raw_ostream &CWriter::printFunctionDeclaration( + raw_ostream &Out, FunctionType *Ty, + std::pair<AttributeList, CallingConv::ID> PAL) { Out << "typedef "; printFunctionProto(Out, Ty, PAL, getFunctionName(Ty, PAL), NULL, false); return Out << ";\n"; } -raw_ostream &CWriter::printFunctionProto(raw_ostream &Out, FunctionType *FTy, - std::pair<AttributeList, CallingConv::ID> Attrs, - const std::string &Name, - Function::arg_iterator ArgList, - bool isKernel) { +raw_ostream & +CWriter::printFunctionProto(raw_ostream &Out, FunctionType *FTy, + std::pair<AttributeList, CallingConv::ID> Attrs, + const std::string &Name, + Function::arg_iterator ArgList, bool isKernel) { - // NOTE: AttributeSet is replaced by 'AttributeList' at function level in LLVM-9 + // NOTE: AttributeSet is replaced by 'AttributeList' at function level in + // LLVM-9 AttributeList &PAL = Attrs.first; if (PAL.hasAttribute(AttributeList::FunctionIndex, Attribute::NoReturn)) @@ -497,7 +536,7 @@ raw_ostream &CWriter::printFunctionProto(raw_ostream &Out, FunctionType *FTy, // Should this function actually return a struct by-value? bool isStructReturn = PAL.hasAttribute(1, Attribute::StructRet) || - PAL.hasAttribute(2, Attribute::StructRet); + PAL.hasAttribute(2, Attribute::StructRet); // Get the return type for the function. Type *RetTy; if (!isStructReturn) @@ -507,24 +546,25 @@ raw_ostream &CWriter::printFunctionProto(raw_ostream &Out, FunctionType *FTy, RetTy = cast<PointerType>(FTy->getParamType(0))->getElementType(); } printTypeName(Out, RetTy, - /*isSigned=*/PAL.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt)); + /*isSigned=*/ + PAL.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt)); Out << "/* Processing Function: " << Name << ": " << Attrs.second << "*/\n"; switch (Attrs.second) { - case CallingConv::C: - break; - case CallingConv::X86_StdCall: - Out << " __stdcall"; - break; - case CallingConv::X86_FastCall: - Out << " __fastcall"; - break; - case CallingConv::X86_ThisCall: - Out << " __thiscall"; - break; - default: - // assert(0 && "Encountered Unhandled Calling Convention"); - break; + case CallingConv::C: + break; + case CallingConv::X86_StdCall: + Out << " __stdcall"; + break; + case CallingConv::X86_FastCall: + Out << " __fastcall"; + break; + case CallingConv::X86_ThisCall: + Out << " __thiscall"; + break; + default: + // assert(0 && "Encountered Unhandled Calling Convention"); + break; } Out << ' ' << Name << '('; @@ -532,7 +572,8 @@ raw_ostream &CWriter::printFunctionProto(raw_ostream &Out, FunctionType *FTy, bool PrintedArg = false; FunctionType::param_iterator I = FTy->param_begin(), E = FTy->param_end(); - //Function::arg_iterator ArgName = ArgList ? ArgList->begin() : Function::arg_iterator(); + // Function::arg_iterator ArgName = ArgList ? ArgList->begin() : + // Function::arg_iterator(); // NOTE: ArgumentLists not supported in LLVM-9 Function::arg_iterator ArgName = ArgList ? ArgList : Function::arg_iterator(); @@ -543,8 +584,10 @@ raw_ostream &CWriter::printFunctionProto(raw_ostream &Out, FunctionType *FTy, assert(I != E && "Invalid struct return function!"); ++I; ++Idx; - // CHECK: very confused as to how next loop starts from first Function Param? - if (ArgList) ++ArgName; + // CHECK: very confused as to how next loop starts from first Function + // Param? + if (ArgList) + ++ArgName; } for (; I != E; ++I) { @@ -559,26 +602,26 @@ raw_ostream &CWriter::printFunctionProto(raw_ostream &Out, FunctionType *FTy, if (PointerType *PTy = dyn_cast<PointerType>(ArgTy)) { unsigned AddrSpace = PTy->getAddressSpace(); DEBUG(errs() << "AddrSpace for " << Idx << " = " << AddrSpace << "\n"); - switch(AddrSpace) { - case GLOBAL_ADDRSPACE: - Out << "__global "; - break; - case SHARED_ADDRSPACE: - Out << "__local "; - break; - case CONSTANT_ADDRSPACE: - Out << "__constant "; - break; - case PRIVATE_ADDRSPACE: - Out << "__private "; - break; - default: - break; + switch (AddrSpace) { + case GLOBAL_ADDRSPACE: + Out << "__global "; + break; + case SHARED_ADDRSPACE: + Out << "__local "; + break; + case CONSTANT_ADDRSPACE: + Out << "__constant "; + break; + case PRIVATE_ADDRSPACE: + Out << "__private "; + break; + default: + break; } } printTypeNameUnaligned(Out, ArgTy, - /*isSigned=*/PAL.hasAttribute(Idx, Attribute::SExt)); + /*isSigned=*/PAL.hasAttribute(Idx, Attribute::SExt)); PrintedArg = true; bool noalias = false; if (PAL.hasAttribute(Idx, Attribute::NoAlias)) { @@ -587,15 +630,16 @@ raw_ostream &CWriter::printFunctionProto(raw_ostream &Out, FunctionType *FTy, ++Idx; if (ArgList) { - Out << ' ' << (noalias ? " restrict " : "") << GetValueName(&*ArgName); + Out << ' ' << (noalias ? " restrict " : "") << GetValueName(&*ArgName); ++ArgName; } } if (FTy->isVarArg()) { if (!PrintedArg) { - Out << "int"; //dummy argument for empty vaarg functs - if (ArgList) Out << " vararg_dummy_arg"; + Out << "int"; // dummy argument for empty vaarg functs + if (ArgList) + Out << " vararg_dummy_arg"; } Out << ", ..."; } else if (!PrintedArg) { @@ -615,16 +659,20 @@ raw_ostream &CWriter::printArrayDeclaration(raw_ostream &Out, ArrayType *ATy) { return Out; } -raw_ostream &CWriter::printVectorDeclaration(raw_ostream &Out, VectorType *VTy) { +raw_ostream &CWriter::printVectorDeclaration(raw_ostream &Out, + VectorType *VTy) { assert(!isEmptyType(VTy)); // Vectors are printed like arrays Out << getVectorName(VTy, false) << " {\n "; printTypeName(Out, VTy->getElementType()); - Out << " vector[" << utostr(VTy->getNumElements()) << "];\n} __attribute__((aligned(" << TD->getABITypeAlignment(VTy) << ")));\n"; + Out << " vector[" << utostr(VTy->getNumElements()) + << "];\n} __attribute__((aligned(" << TD->getABITypeAlignment(VTy) + << ")));\n"; return Out; } -void CWriter::printConstantArray(ConstantArray *CPA, enum OperandContext Context) { +void CWriter::printConstantArray(ConstantArray *CPA, + enum OperandContext Context) { printConstant(cast<Constant>(CPA->getOperand(0)), Context); for (unsigned i = 1, e = CPA->getNumOperands(); i != e; ++i) { Out << ", "; @@ -632,7 +680,8 @@ void CWriter::printConstantArray(ConstantArray *CPA, enum OperandContext Context } } -void CWriter::printConstantVector(ConstantVector *CP, enum OperandContext Context) { +void CWriter::printConstantVector(ConstantVector *CP, + enum OperandContext Context) { printConstant(cast<Constant>(CP->getOperand(0)), Context); for (unsigned i = 1, e = CP->getNumOperands(); i != e; ++i) { Out << ", "; @@ -640,7 +689,8 @@ void CWriter::printConstantVector(ConstantVector *CP, enum OperandContext Contex } } -void CWriter::printConstantDataSequential(ConstantDataSequential *CDS, enum OperandContext Context) { +void CWriter::printConstantDataSequential(ConstantDataSequential *CDS, + enum OperandContext Context) { printConstant(CDS->getElementAsConstant(0), Context); for (unsigned i = 1, e = CDS->getNumElements(); i != e; ++i) { Out << ", "; @@ -652,8 +702,10 @@ bool CWriter::printConstantString(Constant *C, enum OperandContext Context) { // As a special case, print the array as a string if it is an array of // ubytes or an array of sbytes with positive values. ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(C); - if (!CDS || !CDS->isCString()) return false; - if (Context != ContextStatic) return false; // TODO + if (!CDS || !CDS->isCString()) + return false; + if (Context != ContextStatic) + return false; // TODO Out << "{ \""; // Keep track of whether the last number was a hexadecimal escape. @@ -680,19 +732,34 @@ bool CWriter::printConstantString(Constant *C, enum OperandContext Context) { } else { LastWasHex = false; switch (C) { - case '\n': Out << "\\n"; break; - case '\t': Out << "\\t"; break; - case '\r': Out << "\\r"; break; - case '\v': Out << "\\v"; break; - case '\a': Out << "\\a"; break; - case '\"': Out << "\\\""; break; - case '\'': Out << "\\\'"; break; - default: - Out << "\\x"; - Out << (char)(( C/16 < 10) ? ( C/16 +'0') : ( C/16 -10+'A')); - Out << (char)(((C&15) < 10) ? ((C&15)+'0') : ((C&15)-10+'A')); - LastWasHex = true; - break; + case '\n': + Out << "\\n"; + break; + case '\t': + Out << "\\t"; + break; + case '\r': + Out << "\\r"; + break; + case '\v': + Out << "\\v"; + break; + case '\a': + Out << "\\a"; + break; + case '\"': + Out << "\\\""; + break; + case '\'': + Out << "\\\'"; + break; + default: + Out << "\\x"; + Out << (char)((C / 16 < 10) ? (C / 16 + '0') : (C / 16 - 10 + 'A')); + Out << (char)(((C & 15) < 10) ? ((C & 15) + '0') + : ((C & 15) - 10 + 'A')); + LastWasHex = true; + break; } } } @@ -700,7 +767,6 @@ bool CWriter::printConstantString(Constant *C, enum OperandContext Context) { return true; } - // isFPCSafeToPrint - Returns true if we may assume that CFP may be written out // textually as a double (rather than as a reference to a stack-allocated // variable). We decide this by converting CFP to a string and back into a @@ -711,7 +777,7 @@ bool CWriter::printConstantString(Constant *C, enum OperandContext Context) { // // TODO copied from CppBackend, new code should use raw_ostream -static inline std::string ftostr(const APFloat& V) { +static inline std::string ftostr(const APFloat &V) { std::string Buf; if (&V.getSemantics() == &APFloat::IEEEdouble()) { raw_string_ostream(Buf) << V.convertToDouble(); @@ -729,14 +795,13 @@ static bool isFPCSafeToPrint(const ConstantFP *CFP) { if (CFP->getType() != Type::getFloatTy(CFP->getContext()) && CFP->getType() != Type::getDoubleTy(CFP->getContext())) return false; - APFloat APF = APFloat(CFP->getValueAPF()); // copy + APFloat APF = APFloat(CFP->getValueAPF()); // copy if (CFP->getType() == Type::getFloatTy(CFP->getContext())) APF.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &ignored); #if HAVE_PRINTF_A && ENABLE_CBE_PRINTF_A char Buffer[100]; sprintf(Buffer, "%a", APF.convertToDouble()); - if (!strncmp(Buffer, "0x", 2) || - !strncmp(Buffer, "-0x", 3) || + if (!strncmp(Buffer, "0x", 2) || !strncmp(Buffer, "-0x", 3) || !strncmp(Buffer, "+0x", 3)) return APF.bitwiseIsEqual(APFloat(atof(Buffer))); return false; @@ -763,211 +828,249 @@ static bool isFPCSafeToPrint(const ConstantFP *CFP) { void CWriter::printCast(unsigned opc, Type *SrcTy, Type *DstTy) { // Print the destination type cast switch (opc) { - case Instruction::UIToFP: - case Instruction::SIToFP: - case Instruction::IntToPtr: - case Instruction::Trunc: - case Instruction::BitCast: - case Instruction::FPExt: - case Instruction::FPTrunc: // For these the DstTy sign doesn't matter - Out << '('; - printTypeName(Out, DstTy); - Out << ')'; - break; - case Instruction::ZExt: - case Instruction::PtrToInt: - case Instruction::FPToUI: // For these, make sure we get an unsigned dest - Out << '('; - printSimpleType(Out, DstTy, false); - Out << ')'; - break; - case Instruction::SExt: - case Instruction::FPToSI: // For these, make sure we get a signed dest - Out << '('; - printSimpleType(Out, DstTy, true); - Out << ')'; - break; - default: - llvm_unreachable("Invalid cast opcode"); + case Instruction::UIToFP: + case Instruction::SIToFP: + case Instruction::IntToPtr: + case Instruction::Trunc: + case Instruction::BitCast: + case Instruction::FPExt: + case Instruction::FPTrunc: // For these the DstTy sign doesn't matter + Out << '('; + printTypeName(Out, DstTy); + Out << ')'; + break; + case Instruction::ZExt: + case Instruction::PtrToInt: + case Instruction::FPToUI: // For these, make sure we get an unsigned dest + Out << '('; + printSimpleType(Out, DstTy, false); + Out << ')'; + break; + case Instruction::SExt: + case Instruction::FPToSI: // For these, make sure we get a signed dest + Out << '('; + printSimpleType(Out, DstTy, true); + Out << ')'; + break; + default: + llvm_unreachable("Invalid cast opcode"); } // Print the source type cast switch (opc) { - case Instruction::UIToFP: - case Instruction::ZExt: - Out << '('; - printSimpleType(Out, SrcTy, false); - Out << ')'; - break; - case Instruction::SIToFP: - case Instruction::SExt: - Out << '('; - printSimpleType(Out, SrcTy, true); - Out << ')'; - break; - case Instruction::IntToPtr: - case Instruction::PtrToInt: - // Avoid "cast to pointer from integer of different size" warnings - Out << "(uintptr_t)"; - break; - case Instruction::Trunc: - case Instruction::BitCast: - case Instruction::FPExt: - case Instruction::FPTrunc: - case Instruction::FPToSI: - case Instruction::FPToUI: - break; // These don't need a source cast. - default: - llvm_unreachable("Invalid cast opcode"); + case Instruction::UIToFP: + case Instruction::ZExt: + Out << '('; + printSimpleType(Out, SrcTy, false); + Out << ')'; + break; + case Instruction::SIToFP: + case Instruction::SExt: + Out << '('; + printSimpleType(Out, SrcTy, true); + Out << ')'; + break; + case Instruction::IntToPtr: + case Instruction::PtrToInt: + // Avoid "cast to pointer from integer of different size" warnings + Out << "(uintptr_t)"; + break; + case Instruction::Trunc: + case Instruction::BitCast: + case Instruction::FPExt: + case Instruction::FPTrunc: + case Instruction::FPToSI: + case Instruction::FPToUI: + break; // These don't need a source cast. + default: + llvm_unreachable("Invalid cast opcode"); } } // printConstant - The LLVM Constant to C Constant converter. void CWriter::printConstant(Constant *CPV, enum OperandContext Context) { if (ConstantExpr *CE = dyn_cast<ConstantExpr>(CPV)) { - assert(CE->getType()->isIntegerTy() || CE->getType()->isFloatingPointTy() || CE->getType()->isPointerTy()); // TODO: VectorType are valid here, but not supported + assert(CE->getType()->isIntegerTy() || CE->getType()->isFloatingPointTy() || + CE->getType()->isPointerTy()); // TODO: VectorType are valid here, + // but not supported GetElementPtrInst *GEPI; switch (CE->getOpcode()) { - case Instruction::Trunc: - case Instruction::ZExt: - case Instruction::SExt: - case Instruction::FPTrunc: - case Instruction::FPExt: - case Instruction::UIToFP: - case Instruction::SIToFP: - case Instruction::FPToUI: - case Instruction::FPToSI: - case Instruction::PtrToInt: - case Instruction::IntToPtr: - case Instruction::BitCast: - Out << "("; - printCast(CE->getOpcode(), CE->getOperand(0)->getType(), CE->getType()); - if (CE->getOpcode() == Instruction::SExt && - CE->getOperand(0)->getType() == Type::getInt1Ty(CPV->getContext())) { - // Make sure we really sext from bool here by subtracting from 0 - Out << "0-"; - } - printConstant(CE->getOperand(0), ContextCasted); - if (CE->getType() == Type::getInt1Ty(CPV->getContext()) && - (CE->getOpcode() == Instruction::Trunc || - CE->getOpcode() == Instruction::FPToUI || - CE->getOpcode() == Instruction::FPToSI || - CE->getOpcode() == Instruction::PtrToInt)) { - // Make sure we really truncate to bool here by anding with 1 - Out << "&1u"; - } - Out << ')'; - return; + case Instruction::Trunc: + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::FPTrunc: + case Instruction::FPExt: + case Instruction::UIToFP: + case Instruction::SIToFP: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::PtrToInt: + case Instruction::IntToPtr: + case Instruction::BitCast: + Out << "("; + printCast(CE->getOpcode(), CE->getOperand(0)->getType(), CE->getType()); + if (CE->getOpcode() == Instruction::SExt && + CE->getOperand(0)->getType() == Type::getInt1Ty(CPV->getContext())) { + // Make sure we really sext from bool here by subtracting from 0 + Out << "0-"; + } + printConstant(CE->getOperand(0), ContextCasted); + if (CE->getType() == Type::getInt1Ty(CPV->getContext()) && + (CE->getOpcode() == Instruction::Trunc || + CE->getOpcode() == Instruction::FPToUI || + CE->getOpcode() == Instruction::FPToSI || + CE->getOpcode() == Instruction::PtrToInt)) { + // Make sure we really truncate to bool here by anding with 1 + Out << "&1u"; + } + Out << ')'; + return; - case Instruction::GetElementPtr: - Out << "("; - DEBUG(errs() << "\n----------\nCE: " << *CE << "\n"); - GEPI = dyn_cast<GetElementPtrInst>(CE->getAsInstruction()); - DEBUG(errs() << "GEPI: " << *GEPI << "\n"); - printGEPExpression(CE->getOperand(0), gep_type_begin(CPV), gep_type_end(CPV), CE->getOperand(0)->getType()->isArrayTy(), GEPI); - delete(GEPI); - DEBUG(errs() << "Deleted GEPI!\n"); - Out << ")"; - return; - case Instruction::Select: - Out << '('; - printConstant(CE->getOperand(0), ContextCasted); - Out << '?'; - printConstant(CE->getOperand(1), ContextNormal); - Out << ':'; - printConstant(CE->getOperand(2), ContextNormal); - Out << ')'; - return; + case Instruction::GetElementPtr: + Out << "("; + DEBUG(errs() << "\n----------\nCE: " << *CE << "\n"); + GEPI = dyn_cast<GetElementPtrInst>(CE->getAsInstruction()); + DEBUG(errs() << "GEPI: " << *GEPI << "\n"); + printGEPExpression(CE->getOperand(0), gep_type_begin(CPV), + gep_type_end(CPV), + CE->getOperand(0)->getType()->isArrayTy(), GEPI); + delete (GEPI); + DEBUG(errs() << "Deleted GEPI!\n"); + Out << ")"; + return; + case Instruction::Select: + Out << '('; + printConstant(CE->getOperand(0), ContextCasted); + Out << '?'; + printConstant(CE->getOperand(1), ContextNormal); + Out << ':'; + printConstant(CE->getOperand(2), ContextNormal); + Out << ')'; + return; + case Instruction::Add: + case Instruction::FAdd: + case Instruction::Sub: + case Instruction::FSub: + case Instruction::Mul: + case Instruction::FMul: + case Instruction::SDiv: + case Instruction::UDiv: + case Instruction::FDiv: + case Instruction::URem: + case Instruction::SRem: + case Instruction::FRem: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + case Instruction::ICmp: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: { + Out << '('; + bool NeedsClosingParens = printConstExprCast(CE); + printConstantWithCast(CE->getOperand(0), CE->getOpcode()); + switch (CE->getOpcode()) { case Instruction::Add: case Instruction::FAdd: + Out << " + "; + break; case Instruction::Sub: case Instruction::FSub: + Out << " - "; + break; case Instruction::Mul: case Instruction::FMul: - case Instruction::SDiv: - case Instruction::UDiv: - case Instruction::FDiv: + Out << " * "; + break; case Instruction::URem: case Instruction::SRem: case Instruction::FRem: + Out << " % "; + break; + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::FDiv: + Out << " / "; + break; case Instruction::And: + Out << " & "; + break; case Instruction::Or: + Out << " | "; + break; case Instruction::Xor: - case Instruction::ICmp: + Out << " ^ "; + break; case Instruction::Shl: + Out << " << "; + break; case Instruction::LShr: case Instruction::AShr: - { - Out << '('; - bool NeedsClosingParens = printConstExprCast(CE); - printConstantWithCast(CE->getOperand(0), CE->getOpcode()); - switch (CE->getOpcode()) { - case Instruction::Add: - case Instruction::FAdd: Out << " + "; break; - case Instruction::Sub: - case Instruction::FSub: Out << " - "; break; - case Instruction::Mul: - case Instruction::FMul: Out << " * "; break; - case Instruction::URem: - case Instruction::SRem: - case Instruction::FRem: Out << " % "; break; - case Instruction::UDiv: - case Instruction::SDiv: - case Instruction::FDiv: Out << " / "; break; - case Instruction::And: Out << " & "; break; - case Instruction::Or: Out << " | "; break; - case Instruction::Xor: Out << " ^ "; break; - case Instruction::Shl: Out << " << "; break; - case Instruction::LShr: - case Instruction::AShr: Out << " >> "; break; - case Instruction::ICmp: - switch (CE->getPredicate()) { - case ICmpInst::ICMP_EQ: Out << " == "; break; - case ICmpInst::ICMP_NE: Out << " != "; break; - case ICmpInst::ICMP_SLT: - case ICmpInst::ICMP_ULT: Out << " < "; break; - case ICmpInst::ICMP_SLE: - case ICmpInst::ICMP_ULE: Out << " <= "; break; - case ICmpInst::ICMP_SGT: - case ICmpInst::ICMP_UGT: Out << " > "; break; - case ICmpInst::ICMP_SGE: - case ICmpInst::ICMP_UGE: Out << " >= "; break; - default: llvm_unreachable("Illegal ICmp predicate"); - } - break; - default: llvm_unreachable("Illegal opcode here!"); - } - printConstantWithCast(CE->getOperand(1), CE->getOpcode()); - if (NeedsClosingParens) - Out << "))"; - Out << ')'; - return; + Out << " >> "; + break; + case Instruction::ICmp: + switch (CE->getPredicate()) { + case ICmpInst::ICMP_EQ: + Out << " == "; + break; + case ICmpInst::ICMP_NE: + Out << " != "; + break; + case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_ULT: + Out << " < "; + break; + case ICmpInst::ICMP_SLE: + case ICmpInst::ICMP_ULE: + Out << " <= "; + break; + case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_UGT: + Out << " > "; + break; + case ICmpInst::ICMP_SGE: + case ICmpInst::ICMP_UGE: + Out << " >= "; + break; + default: + llvm_unreachable("Illegal ICmp predicate"); } - case Instruction::FCmp: { - Out << '('; - bool NeedsClosingParens = printConstExprCast(CE); - if (CE->getPredicate() == FCmpInst::FCMP_FALSE) - Out << "0"; - else if (CE->getPredicate() == FCmpInst::FCMP_TRUE) - Out << "1"; - else { - Out << "llvm_fcmp_" << getCmpPredicateName((CmpInst::Predicate)CE->getPredicate()) << "("; - printConstant(CE->getOperand(0), ContextCasted); - Out << ", "; - printConstant(CE->getOperand(1), ContextCasted); - Out << ")"; - } - if (NeedsClosingParens) - Out << "))"; - Out << ')'; - return; - } + break; default: + llvm_unreachable("Illegal opcode here!"); + } + printConstantWithCast(CE->getOperand(1), CE->getOpcode()); + if (NeedsClosingParens) + Out << "))"; + Out << ')'; + return; + } + case Instruction::FCmp: { + Out << '('; + bool NeedsClosingParens = printConstExprCast(CE); + if (CE->getPredicate() == FCmpInst::FCMP_FALSE) + Out << "0"; + else if (CE->getPredicate() == FCmpInst::FCMP_TRUE) + Out << "1"; + else { + Out << "llvm_fcmp_" + << getCmpPredicateName((CmpInst::Predicate)CE->getPredicate()) + << "("; + printConstant(CE->getOperand(0), ContextCasted); + Out << ", "; + printConstant(CE->getOperand(1), ContextCasted); + Out << ")"; + } + if (NeedsClosingParens) + Out << "))"; + Out << ')'; + return; + } + default: #ifndef NDEBUG - errs() << "CWriter Error: Unhandled constant expression: " - << *CE << "\n"; + errs() << "CWriter Error: Unhandled constant expression: " << *CE << "\n"; #endif - llvm_unreachable(0); + llvm_unreachable(0); } } else if (isa<UndefValue>(CPV) && CPV->getType()->isSingleValueType()) { if (CPV->getType()->isVectorTy()) { @@ -984,7 +1087,8 @@ void CWriter::printConstant(Constant *CPV, enum OperandContext Context) { Constant *Zero = Constant::getNullValue(VT->getElementType()); unsigned NumElts = VT->getNumElements(); for (unsigned i = 0; i != NumElts; ++i) { - if (i) Out << ", "; + if (i) + Out << ", "; printConstant(Zero, ContextCasted); } Out << ")"; @@ -998,9 +1102,10 @@ void CWriter::printConstant(Constant *CPV, enum OperandContext Context) { } if (ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) { - Type* Ty = CI->getType(); + Type *Ty = CI->getType(); unsigned ActiveBits = CI->getValue().getMinSignedBits(); - DEBUG(errs() << "Here: " << *CI << ", " << *Ty << ", " << ActiveBits << "\n"); + DEBUG(errs() << "Here: " << *CI << ", " << *Ty << ", " << ActiveBits + << "\n"); Out << CI->getSExtValue(); // if (Ty == Type::getInt1Ty(CPV->getContext())) { // Out << (CI->getZExtValue() ? '1' : '0'); @@ -1013,7 +1118,8 @@ void CWriter::printConstant(Constant *CPV, enum OperandContext Context) { // Out << CI->getSExtValue(); // most likely a shorter representation //// if (ActiveBits >= 32) //// Out << ")"; - // } else if (Ty->getPrimitiveSizeInBits() < 32 && Context == ContextNormal) { + // } else if (Ty->getPrimitiveSizeInBits() < 32 && Context == + // ContextNormal) { // Out << "(("; // printSimpleType(Out, Ty, false) << ')'; // if (CI->isMinValue(true)) @@ -1030,248 +1136,266 @@ void CWriter::printConstant(Constant *CPV, enum OperandContext Context) { //// const APInt &V = CI->getValue(); //// const APInt &Vlo = V.getLoBits(64); //// const APInt &Vhi = V.getHiBits(64); - //// Out << (Context == ContextStatic ? "UINT128_C" : "llvm_ctor_u128"); - //// Out << "(UINT64_C(" << Vhi.getZExtValue() << "), UINT64_C(" << Vlo.getZExtValue() << "))"; + //// Out << (Context == ContextStatic ? "UINT128_C" : + ///"llvm_ctor_u128"); / Out << "(UINT64_C(" << Vhi.getZExtValue() << + ///"), UINT64_C(" << Vlo.getZExtValue() << "))"; // } return; } switch (CPV->getType()->getTypeID()) { - case Type::FloatTyID: - case Type::DoubleTyID: - case Type::X86_FP80TyID: - case Type::PPC_FP128TyID: - case Type::FP128TyID: { - ConstantFP *FPC = cast<ConstantFP>(CPV); - std::map<const ConstantFP*, unsigned>::iterator I = FPConstantMap.find(FPC); - if (I != FPConstantMap.end()) { - // Because of FP precision problems we must load from a stack allocated - // value that holds the value in hex. - Out << "(*(" << (FPC->getType() == Type::getFloatTy(CPV->getContext()) ? - "float" : - FPC->getType() == Type::getDoubleTy(CPV->getContext()) ? - "double" : - "long double") - << "*)&FPConstant" << I->second << ')'; - } else { - double V; - if (FPC->getType() == Type::getFloatTy(CPV->getContext())) - V = FPC->getValueAPF().convertToFloat(); - else if (FPC->getType() == Type::getDoubleTy(CPV->getContext())) - V = FPC->getValueAPF().convertToDouble(); - else { - // Long double. Convert the number to double, discarding precision. - // This is not awesome, but it at least makes the CBE output somewhat - // useful. - APFloat Tmp = FPC->getValueAPF(); - bool LosesInfo; - Tmp.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &LosesInfo); - V = Tmp.convertToDouble(); - } - - if (std::isnan(V)) { - // The value is NaN - - // FIXME the actual NaN bits should be emitted. - // The prefix for a quiet NaN is 0x7FF8. For a signalling NaN, - // it's 0x7ff4. - const unsigned long QuietNaN = 0x7ff8UL; - //const unsigned long SignalNaN = 0x7ff4UL; - - // We need to grab the first part of the FP # - char Buffer[100]; - - uint64_t ll = DoubleToBits(V); - sprintf(Buffer, "0x%llx", static_cast<long long>(ll)); - - std::string Num(&Buffer[0], &Buffer[6]); - unsigned long Val = strtoul(Num.c_str(), 0, 16); - - if (FPC->getType() == Type::getFloatTy(FPC->getContext())) - Out << "LLVM_NAN" << (Val == QuietNaN ? "" : "S") << "F(\"" - << Buffer << "\") /*nan*/ "; - else - Out << "LLVM_NAN" << (Val == QuietNaN ? "" : "S") << "(\"" - << Buffer << "\") /*nan*/ "; - } else if (std::isinf(V)) { - // The value is Inf - if (V < 0) Out << '-'; - Out << "LLVM_INF" << - (FPC->getType() == Type::getFloatTy(FPC->getContext()) ? "F" : "") - << " /*inf*/ "; - } else { - std::string Num; + case Type::FloatTyID: + case Type::DoubleTyID: + case Type::X86_FP80TyID: + case Type::PPC_FP128TyID: + case Type::FP128TyID: { + ConstantFP *FPC = cast<ConstantFP>(CPV); + std::map<const ConstantFP *, unsigned>::iterator I = + FPConstantMap.find(FPC); + if (I != FPConstantMap.end()) { + // Because of FP precision problems we must load from a stack allocated + // value that holds the value in hex. + Out << "(*(" + << (FPC->getType() == Type::getFloatTy(CPV->getContext()) + ? "float" + : FPC->getType() == Type::getDoubleTy(CPV->getContext()) + ? "double" + : "long double") + << "*)&FPConstant" << I->second << ')'; + } else { + double V; + if (FPC->getType() == Type::getFloatTy(CPV->getContext())) + V = FPC->getValueAPF().convertToFloat(); + else if (FPC->getType() == Type::getDoubleTy(CPV->getContext())) + V = FPC->getValueAPF().convertToDouble(); + else { + // Long double. Convert the number to double, discarding precision. + // This is not awesome, but it at least makes the CBE output somewhat + // useful. + APFloat Tmp = FPC->getValueAPF(); + bool LosesInfo; + Tmp.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &LosesInfo); + V = Tmp.convertToDouble(); + } + + if (std::isnan(V)) { + // The value is NaN + + // FIXME the actual NaN bits should be emitted. + // The prefix for a quiet NaN is 0x7FF8. For a signalling NaN, + // it's 0x7ff4. + const unsigned long QuietNaN = 0x7ff8UL; + // const unsigned long SignalNaN = 0x7ff4UL; + + // We need to grab the first part of the FP # + char Buffer[100]; + + uint64_t ll = DoubleToBits(V); + sprintf(Buffer, "0x%llx", static_cast<long long>(ll)); + + std::string Num(&Buffer[0], &Buffer[6]); + unsigned long Val = strtoul(Num.c_str(), 0, 16); + + if (FPC->getType() == Type::getFloatTy(FPC->getContext())) + Out << "LLVM_NAN" << (Val == QuietNaN ? "" : "S") << "F(\"" << Buffer + << "\") /*nan*/ "; + else + Out << "LLVM_NAN" << (Val == QuietNaN ? "" : "S") << "(\"" << Buffer + << "\") /*nan*/ "; + } else if (std::isinf(V)) { + // The value is Inf + if (V < 0) + Out << '-'; + Out << "LLVM_INF" + << (FPC->getType() == Type::getFloatTy(FPC->getContext()) ? "F" + : "") + << " /*inf*/ "; + } else { + std::string Num; #if HAVE_PRINTF_A && ENABLE_CBE_PRINTF_A - // Print out the constant as a floating point number. - char Buffer[100]; - sprintf(Buffer, "%a", V); - Num = Buffer; + // Print out the constant as a floating point number. + char Buffer[100]; + sprintf(Buffer, "%a", V); + Num = Buffer; #else - Num = ftostr(FPC->getValueAPF()); + Num = ftostr(FPC->getValueAPF()); #endif - Out << Num; - } - } - break; - } - - case Type::ArrayTyID: { - if (printConstantString(CPV, Context)) break; - ArrayType *AT = cast<ArrayType>(CPV->getType()); - assert(AT->getNumElements() != 0 && !isEmptyType(AT)); - if (Context != ContextStatic) { - CtorDeclTypes.insert(AT); - Out << "llvm_ctor_"; - printTypeString(Out, AT, false); - Out << "("; - Context = ContextCasted; - } else { - Out << "{ { "; // Arrays are wrapped in struct types. - } - if (ConstantArray *CA = dyn_cast<ConstantArray>(CPV)) { - printConstantArray(CA, Context); - } else if (ConstantDataSequential *CDS = - dyn_cast<ConstantDataSequential>(CPV)) { - printConstantDataSequential(CDS, Context); - } else { - assert(isa<ConstantAggregateZero>(CPV) || isa<UndefValue>(CPV)); - Constant *CZ = Constant::getNullValue(AT->getElementType()); - printConstant(CZ, Context); - for (unsigned i = 1, e = AT->getNumElements(); i != e; ++i) { - Out << ", "; - printConstant(CZ, Context); - } - } - Out << (Context == ContextStatic ? " } }" : ")"); // Arrays are wrapped in struct types. - break; - } - - case Type::VectorTyID: { - VectorType *VT = cast<VectorType>(CPV->getType()); - assert(VT->getNumElements() != 0 && !isEmptyType(VT)); - if (Context != ContextStatic) { - CtorDeclTypes.insert(VT); - Out << "llvm_ctor_"; - printTypeString(Out, VT, false); - Out << "("; - Context = ContextCasted; - } else { - Out << "{ "; - } - if (ConstantVector *CV = dyn_cast<ConstantVector>(CPV)) { - printConstantVector(CV, Context); - } else if (ConstantDataSequential *CDS = - dyn_cast<ConstantDataSequential>(CPV)) { - printConstantDataSequential(CDS, Context); - } else { - assert(isa<ConstantAggregateZero>(CPV) || isa<UndefValue>(CPV)); - Constant *CZ = Constant::getNullValue(VT->getElementType()); - printConstant(CZ, Context); - for (unsigned i = 1, e = VT->getNumElements(); i != e; ++i) { - Out << ", "; - printConstant(CZ, Context); - } - } - Out << (Context == ContextStatic ? " }" : ")"); - break; - } - - case Type::StructTyID: { - StructType *ST = cast<StructType>(CPV->getType()); - assert(!isEmptyType(ST)); - if (Context != ContextStatic) { - CtorDeclTypes.insert(ST); - Out << "llvm_ctor_"; - printTypeString(Out, ST, false); - Out << "("; - Context = ContextCasted; - } else { - Out << "{ "; - } - - if (isa<ConstantAggregateZero>(CPV) || isa<UndefValue>(CPV)) { - bool printed = false; - for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) { - Type *ElTy = ST->getElementType(i); - if (isEmptyType(ElTy)) continue; - if (printed) Out << ", "; - printConstant(Constant::getNullValue(ElTy), Context); - printed = true; - } - assert(printed); - } else { - bool printed = false; - for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) { - Constant *C = cast<Constant>(CPV->getOperand(i)); - if (isEmptyType(C->getType())) continue; - if (printed) Out << ", "; - printConstant(C, Context); - printed = true; - } - assert(printed); - } - Out << (Context == ContextStatic ? " }" : ")"); - break; - } - - case Type::PointerTyID: - if (isa<ConstantPointerNull>(CPV)) { - Out << "(("; - printTypeName(Out, CPV->getType()); // sign doesn't matter - Out << ")/*NULL*/0)"; - break; - } else if (GlobalValue *GV = dyn_cast<GlobalValue>(CPV)) { - writeOperand(GV); - break; - } - // FALL THROUGH - default: + Out << Num; + } + } + break; + } + + case Type::ArrayTyID: { + if (printConstantString(CPV, Context)) + break; + ArrayType *AT = cast<ArrayType>(CPV->getType()); + assert(AT->getNumElements() != 0 && !isEmptyType(AT)); + if (Context != ContextStatic) { + CtorDeclTypes.insert(AT); + Out << "llvm_ctor_"; + printTypeString(Out, AT, false); + Out << "("; + Context = ContextCasted; + } else { + Out << "{ { "; // Arrays are wrapped in struct types. + } + if (ConstantArray *CA = dyn_cast<ConstantArray>(CPV)) { + printConstantArray(CA, Context); + } else if (ConstantDataSequential *CDS = + dyn_cast<ConstantDataSequential>(CPV)) { + printConstantDataSequential(CDS, Context); + } else { + assert(isa<ConstantAggregateZero>(CPV) || isa<UndefValue>(CPV)); + Constant *CZ = Constant::getNullValue(AT->getElementType()); + printConstant(CZ, Context); + for (unsigned i = 1, e = AT->getNumElements(); i != e; ++i) { + Out << ", "; + printConstant(CZ, Context); + } + } + Out << (Context == ContextStatic + ? " } }" + : ")"); // Arrays are wrapped in struct types. + break; + } + + case Type::VectorTyID: { + VectorType *VT = cast<VectorType>(CPV->getType()); + assert(VT->getNumElements() != 0 && !isEmptyType(VT)); + if (Context != ContextStatic) { + CtorDeclTypes.insert(VT); + Out << "llvm_ctor_"; + printTypeString(Out, VT, false); + Out << "("; + Context = ContextCasted; + } else { + Out << "{ "; + } + if (ConstantVector *CV = dyn_cast<ConstantVector>(CPV)) { + printConstantVector(CV, Context); + } else if (ConstantDataSequential *CDS = + dyn_cast<ConstantDataSequential>(CPV)) { + printConstantDataSequential(CDS, Context); + } else { + assert(isa<ConstantAggregateZero>(CPV) || isa<UndefValue>(CPV)); + Constant *CZ = Constant::getNullValue(VT->getElementType()); + printConstant(CZ, Context); + for (unsigned i = 1, e = VT->getNumElements(); i != e; ++i) { + Out << ", "; + printConstant(CZ, Context); + } + } + Out << (Context == ContextStatic ? " }" : ")"); + break; + } + + case Type::StructTyID: { + StructType *ST = cast<StructType>(CPV->getType()); + assert(!isEmptyType(ST)); + if (Context != ContextStatic) { + CtorDeclTypes.insert(ST); + Out << "llvm_ctor_"; + printTypeString(Out, ST, false); + Out << "("; + Context = ContextCasted; + } else { + Out << "{ "; + } + + if (isa<ConstantAggregateZero>(CPV) || isa<UndefValue>(CPV)) { + bool printed = false; + for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) { + Type *ElTy = ST->getElementType(i); + if (isEmptyType(ElTy)) + continue; + if (printed) + Out << ", "; + printConstant(Constant::getNullValue(ElTy), Context); + printed = true; + } + assert(printed); + } else { + bool printed = false; + for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) { + Constant *C = cast<Constant>(CPV->getOperand(i)); + if (isEmptyType(C->getType())) + continue; + if (printed) + Out << ", "; + printConstant(C, Context); + printed = true; + } + assert(printed); + } + Out << (Context == ContextStatic ? " }" : ")"); + break; + } + + case Type::PointerTyID: + if (isa<ConstantPointerNull>(CPV)) { + Out << "(("; + printTypeName(Out, CPV->getType()); // sign doesn't matter + Out << ")/*NULL*/0)"; + break; + } else if (GlobalValue *GV = dyn_cast<GlobalValue>(CPV)) { + writeOperand(GV); + break; + } + // FALL THROUGH + default: #ifndef NDEBUG - errs() << "Unknown constant type: " << *CPV << "\n"; + errs() << "Unknown constant type: " << *CPV << "\n"; #endif - llvm_unreachable(0); + llvm_unreachable(0); } } // Some constant expressions need to be casted back to the original types // because their operands were casted to the expected type. This function takes // care of detecting that case and printing the cast for the ConstantExpr. -bool CWriter::printConstExprCast(ConstantExpr* CE) { +bool CWriter::printConstExprCast(ConstantExpr *CE) { bool NeedsExplicitCast = false; Type *Ty = CE->getOperand(0)->getType(); bool TypeIsSigned = false; switch (CE->getOpcode()) { - case Instruction::Add: - case Instruction::Sub: - case Instruction::Mul: - // We need to cast integer arithmetic so that it is always performed - // as unsigned, to avoid undefined behavior on overflow. - case Instruction::LShr: - case Instruction::URem: - case Instruction::UDiv: NeedsExplicitCast = true; break; - case Instruction::AShr: - case Instruction::SRem: - case Instruction::SDiv: NeedsExplicitCast = true; TypeIsSigned = true; break; - case Instruction::SExt: - Ty = CE->getType(); - NeedsExplicitCast = true; - TypeIsSigned = true; - break; - case Instruction::ZExt: - case Instruction::Trunc: - case Instruction::FPTrunc: - case Instruction::FPExt: - case Instruction::UIToFP: - case Instruction::SIToFP: - case Instruction::FPToUI: - case Instruction::FPToSI: - case Instruction::PtrToInt: - case Instruction::IntToPtr: - case Instruction::BitCast: - Ty = CE->getType(); - NeedsExplicitCast = true; - break; - default: break; + case Instruction::Add: + case Instruction::Sub: + case Instruction::Mul: + // We need to cast integer arithmetic so that it is always performed + // as unsigned, to avoid undefined behavior on overflow. + case Instruction::LShr: + case Instruction::URem: + case Instruction::UDiv: + NeedsExplicitCast = true; + break; + case Instruction::AShr: + case Instruction::SRem: + case Instruction::SDiv: + NeedsExplicitCast = true; + TypeIsSigned = true; + break; + case Instruction::SExt: + Ty = CE->getType(); + NeedsExplicitCast = true; + TypeIsSigned = true; + break; + case Instruction::ZExt: + case Instruction::Trunc: + case Instruction::FPTrunc: + case Instruction::FPExt: + case Instruction::UIToFP: + case Instruction::SIToFP: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::PtrToInt: + case Instruction::IntToPtr: + case Instruction::BitCast: + Ty = CE->getType(); + NeedsExplicitCast = true; + break; + default: + break; } if (NeedsExplicitCast) { Out << "(("; @@ -1284,11 +1408,13 @@ bool CWriter::printConstExprCast(ConstantExpr* CE) { // Print a constant assuming that it is the operand for a given Opcode. The // opcodes that care about sign need to cast their operands to the expected // type before the operation proceeds. This function does the casting. -void CWriter::printConstantWithCast(Constant* CPV, unsigned Opcode) { +void CWriter::printConstantWithCast(Constant *CPV, unsigned Opcode) { // Extract the operand's type, we'll need it. - Type* OpTy = CPV->getType(); - assert(OpTy->isIntegerTy() || OpTy->isFloatingPointTy()); // TODO: VectorType are valid here, but not supported + Type *OpTy = CPV->getType(); + assert(OpTy->isIntegerTy() || + OpTy->isFloatingPointTy()); // TODO: VectorType are valid here, but not + // supported // Indicate whether to do the cast or not. bool shouldCast; @@ -1331,8 +1457,7 @@ std::string CWriter::GetValueName(Value *Operand) { std::string VarName; VarName.reserve(Name.capacity()); - for (std::string::iterator I = Name.begin(), E = Name.end(); - I != E; ++I) { + for (std::string::iterator I = Name.begin(), E = Name.end(); I != E; ++I) { unsigned char ch = *I; if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || @@ -1356,7 +1481,7 @@ void CWriter::writeInstComputationInline(Instruction &I) { unsigned mask = 0; Type *Ty = I.getType(); if (Ty->isIntegerTy()) { - IntegerType *ITy = static_cast<IntegerType*>(Ty); + IntegerType *ITy = static_cast<IntegerType *>(Ty); if (!ITy->isPowerOf2ByteWidth()) mask = ITy->getBitMask(); } @@ -1374,20 +1499,21 @@ void CWriter::writeInstComputationInline(Instruction &I) { Out << ")&" << mask << ")"; } - -void CWriter::writeOperandInternal(Value *Operand, enum OperandContext Context) { - DEBUG(errs() << "In write operand internal: " << *Operand << "\n"); +void CWriter::writeOperandInternal(Value *Operand, + enum OperandContext Context) { + DEBUG(errs() << "In write operand internal: " << *Operand << "\n"); if (Instruction *I = dyn_cast<Instruction>(Operand)) // Should we inline this instruction to build a tree? if (isInlinableInst(*I) && !isDirectAlloca(I)) { - DEBUG(errs() << "isInlinableInst & NOT isDirectAlloca\n" << "\n"); + DEBUG(errs() << "isInlinableInst & NOT isDirectAlloca\n" + << "\n"); Out << '('; writeInstComputationInline(*I); Out << ')'; return; } - Constant* CPV = dyn_cast<Constant>(Operand); + Constant *CPV = dyn_cast<Constant>(Operand); if (CPV && !isa<GlobalValue>(CPV)) printConstant(CPV, Context); @@ -1395,12 +1521,14 @@ void CWriter::writeOperandInternal(Value *Operand, enum OperandContext Context) Out << GetValueName(Operand); } -void CWriter::writeOperand(Value *Operand, enum OperandContext Context, bool arrayAccess) { - DEBUG(errs() << "In write operand: " << *Operand << "; ArrayAccess = " << arrayAccess << "\n"); +void CWriter::writeOperand(Value *Operand, enum OperandContext Context, + bool arrayAccess) { + DEBUG(errs() << "In write operand: " << *Operand + << "; ArrayAccess = " << arrayAccess << "\n"); bool isAddressImplicit = isAddressExposed(Operand); if (isAddressImplicit && !arrayAccess) { DEBUG(errs() << "isAddressImplicit & NOT arrayAccess!\n"); - Out << "(&"; // Global variables are referenced as their addresses by llvm + Out << "(&"; // Global variables are referenced as their addresses by llvm } writeOperandInternal(Operand, Context); @@ -1429,26 +1557,27 @@ void CWriter::writeOperandDeref(Value *Operand) { bool CWriter::writeInstructionCast(Instruction &I) { Type *Ty = I.getOperand(0)->getType(); switch (I.getOpcode()) { - case Instruction::Add: - case Instruction::Sub: - case Instruction::Mul: - // We need to cast integer arithmetic so that it is always performed - // as unsigned, to avoid undefined behavior on overflow. - case Instruction::LShr: - case Instruction::URem: - case Instruction::UDiv: - Out << "(("; - printSimpleType(Out, Ty, false); - Out << ")("; - return true; - case Instruction::AShr: - case Instruction::SRem: - case Instruction::SDiv: - Out << "(("; - printSimpleType(Out, Ty, true); - Out << ")("; - return true; - default: break; + case Instruction::Add: + case Instruction::Sub: + case Instruction::Mul: + // We need to cast integer arithmetic so that it is always performed + // as unsigned, to avoid undefined behavior on overflow. + case Instruction::LShr: + case Instruction::URem: + case Instruction::UDiv: + Out << "(("; + printSimpleType(Out, Ty, false); + Out << ")("; + return true; + case Instruction::AShr: + case Instruction::SRem: + case Instruction::SDiv: + Out << "(("; + printSimpleType(Out, Ty, true); + Out << ")("; + return true; + default: + break; } return false; } @@ -1456,7 +1585,8 @@ bool CWriter::writeInstructionCast(Instruction &I) { // Write the operand with a cast to another type based on the Opcode being used. // This will be used in cases where an instruction has specific type // requirements (usually signedness) for its operands. -void CWriter::opcodeNeedsCast(unsigned Opcode, +void CWriter::opcodeNeedsCast( + unsigned Opcode, // Indicate whether to do the cast or not. bool &shouldCast, // Indicate whether the cast should be to a signed type or not. @@ -1466,33 +1596,33 @@ void CWriter::opcodeNeedsCast(unsigned Opcode, // the new type to which the operand should be casted by setting the value // of OpTy. If we change OpTy, also set shouldCast to true. switch (Opcode) { - default: - // for most instructions, it doesn't matter - shouldCast = false; - castIsSigned = false; - break; - case Instruction::Add: - case Instruction::Sub: - case Instruction::Mul: - // We need to cast integer arithmetic so that it is always performed - // as unsigned, to avoid undefined behavior on overflow. - case Instruction::LShr: - case Instruction::UDiv: - case Instruction::URem: // Cast to unsigned first - shouldCast = true; - castIsSigned = false; - break; - case Instruction::GetElementPtr: - case Instruction::AShr: - case Instruction::SDiv: - case Instruction::SRem: // Cast to signed first - shouldCast = true; - castIsSigned = true; - break; + default: + // for most instructions, it doesn't matter + shouldCast = false; + castIsSigned = false; + break; + case Instruction::Add: + case Instruction::Sub: + case Instruction::Mul: + // We need to cast integer arithmetic so that it is always performed + // as unsigned, to avoid undefined behavior on overflow. + case Instruction::LShr: + case Instruction::UDiv: + case Instruction::URem: // Cast to unsigned first + shouldCast = true; + castIsSigned = false; + break; + case Instruction::GetElementPtr: + case Instruction::AShr: + case Instruction::SDiv: + case Instruction::SRem: // Cast to signed first + shouldCast = true; + castIsSigned = true; + break; } } -void CWriter::writeOperandWithCast(Value* Operand, unsigned Opcode) { +void CWriter::writeOperandWithCast(Value *Operand, unsigned Opcode) { DEBUG(errs() << "Here: " << *Operand << "\n"); // Write out the casted operand if we should, otherwise just write the // operand. @@ -1510,12 +1640,12 @@ void CWriter::writeOperandWithCast(Value* Operand, unsigned Opcode) { // writeOperand(Operand, ContextCasted); // Out << ")"; // } else - writeOperand(Operand, ContextNormal/*ContextCasted*/); + writeOperand(Operand, ContextNormal /*ContextCasted*/); } // Write the operand with a cast to another type based on the icmp predicate // being used. -void CWriter::writeOperandWithCast(Value* Operand, ICmpInst &Cmp) { +void CWriter::writeOperandWithCast(Value *Operand, ICmpInst &Cmp) { // This has to do a cast to ensure the operand has the right signedness. // Also, if the operand is a pointer, we make sure to cast to an integer when // doing the comparison both for signedness and so that the C compiler doesn't @@ -1534,7 +1664,7 @@ void CWriter::writeOperandWithCast(Value* Operand, ICmpInst &Cmp) { bool castIsSigned = Cmp.isSigned(); // If the operand was a pointer, convert to a large integer type. - Type* OpTy = Operand->getType(); + Type *OpTy = Operand->getType(); if (OpTy->isPointerTy()) OpTy = TD->getIntPtrType(Operand->getContext()); @@ -1548,61 +1678,64 @@ void CWriter::writeOperandWithCast(Value* Operand, ICmpInst &Cmp) { // generateCompilerSpecificCode - This is where we add conditional compilation // directives to cater to specific compilers as need be. // -static void generateCompilerSpecificCode(raw_ostream& Out, - const DataLayout *TD) { +static void generateCompilerSpecificCode(raw_ostream &Out, + const DataLayout *TD) { // Alloca is hard to get, and we don't want to include stdlib.h here. Out << "/* get a declaration for alloca */\n" - << "#if defined(__CYGWIN__) || defined(__MINGW32__)\n" - << "#define alloca(x) __builtin_alloca((x))\n" - << "#define _alloca(x) __builtin_alloca((x))\n" - << "#elif defined(__APPLE__)\n" - << "extern void *__builtin_alloca(unsigned long);\n" - << "#define alloca(x) __builtin_alloca(x)\n" - << "#define longjmp _longjmp\n" - << "#define setjmp _setjmp\n" - << "#elif defined(__sun__)\n" - << "#if defined(__sparcv9)\n" - << "extern void *__builtin_alloca(unsigned long);\n" - << "#else\n" - << "extern void *__builtin_alloca(unsigned int);\n" - << "#endif\n" - << "#define alloca(x) __builtin_alloca(x)\n" - << "#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__arm__)\n" - << "#define alloca(x) __builtin_alloca(x)\n" - << "#elif defined(_MSC_VER)\n" - << "#define alloca(x) _alloca(x)\n" - << "#else\n" - << "#include <alloca.h>\n" - << "#endif\n\n"; + << "#if defined(__CYGWIN__) || defined(__MINGW32__)\n" + << "#define alloca(x) __builtin_alloca((x))\n" + << "#define _alloca(x) __builtin_alloca((x))\n" + << "#elif defined(__APPLE__)\n" + << "extern void *__builtin_alloca(unsigned long);\n" + << "#define alloca(x) __builtin_alloca(x)\n" + << "#define longjmp _longjmp\n" + << "#define setjmp _setjmp\n" + << "#elif defined(__sun__)\n" + << "#if defined(__sparcv9)\n" + << "extern void *__builtin_alloca(unsigned long);\n" + << "#else\n" + << "extern void *__builtin_alloca(unsigned int);\n" + << "#endif\n" + << "#define alloca(x) __builtin_alloca(x)\n" + << "#elif defined(__FreeBSD__) || defined(__NetBSD__) || " + "defined(__OpenBSD__) || defined(__DragonFly__) || defined(__arm__)\n" + << "#define alloca(x) __builtin_alloca(x)\n" + << "#elif defined(_MSC_VER)\n" + << "#define alloca(x) _alloca(x)\n" + << "#else\n" + << "#include <alloca.h>\n" + << "#endif\n\n"; // On Mac OS X, "external weak" is spelled "__attribute__((weak_import))". Out << "#if defined(__GNUC__) && defined(__APPLE_CC__)\n" - << "#define __EXTERNAL_WEAK__ __attribute__((weak_import))\n" - << "#elif defined(__GNUC__)\n" - << "#define __EXTERNAL_WEAK__ __attribute__((weak))\n" - << "#else\n" - << "#define __EXTERNAL_WEAK__\n" - << "#endif\n\n"; + << "#define __EXTERNAL_WEAK__ __attribute__((weak_import))\n" + << "#elif defined(__GNUC__)\n" + << "#define __EXTERNAL_WEAK__ __attribute__((weak))\n" + << "#else\n" + << "#define __EXTERNAL_WEAK__\n" + << "#endif\n\n"; // For now, turn off the weak linkage attribute on Mac OS X. (See above.) Out << "#if defined(__GNUC__) && defined(__APPLE_CC__)\n" - << "#define __ATTRIBUTE_WEAK__\n" - << "#elif defined(__GNUC__)\n" - << "#define __ATTRIBUTE_WEAK__ __attribute__((weak))\n" - << "#else\n" - << "#define __ATTRIBUTE_WEAK__\n" - << "#endif\n\n"; + << "#define __ATTRIBUTE_WEAK__\n" + << "#elif defined(__GNUC__)\n" + << "#define __ATTRIBUTE_WEAK__ __attribute__((weak))\n" + << "#else\n" + << "#define __ATTRIBUTE_WEAK__\n" + << "#endif\n\n"; // Add hidden visibility support. FIXME: APPLE_CC? Out << "#if defined(__GNUC__)\n" - << "#define __HIDDEN__ __attribute__((visibility(\"hidden\")))\n" - << "#endif\n\n"; + << "#define __HIDDEN__ __attribute__((visibility(\"hidden\")))\n" + << "#endif\n\n"; // Define unaligned-load helper macro Out << "#ifdef _MSC_VER\n"; - Out << "#define __UNALIGNED_LOAD__(type, align, op) *((type __unaligned*)op)\n"; + Out << "#define __UNALIGNED_LOAD__(type, align, op) *((type " + "__unaligned*)op)\n"; Out << "#else\n"; - Out << "#define __UNALIGNED_LOAD__(type, align, op) ((struct { type data __attribute__((packed, aligned(align))); }*)op)->data\n"; + Out << "#define __UNALIGNED_LOAD__(type, align, op) ((struct { type data " + "__attribute__((packed, aligned(align))); }*)op)->data\n"; Out << "#endif\n\n"; // Define unaligned-load helper macro @@ -1653,110 +1786,144 @@ static void generateCompilerSpecificCode(raw_ostream& Out, // // Similar to __builtin_inf, except the return type is float. Out << "#ifdef __GNUC__\n" - << "#define LLVM_NAN(NanStr) __builtin_nan(NanStr) /* Double */\n" - << "#define LLVM_NANF(NanStr) __builtin_nanf(NanStr) /* Float */\n" - //<< "#define LLVM_NANS(NanStr) __builtin_nans(NanStr) /* Double */\n" - //<< "#define LLVM_NANSF(NanStr) __builtin_nansf(NanStr) /* Float */\n" - << "#define LLVM_INF __builtin_inf() /* Double */\n" - << "#define LLVM_INFF __builtin_inff() /* Float */\n" - << "#define LLVM_PREFETCH(addr,rw,locality) " - "__builtin_prefetch(addr,rw,locality)\n" - << "#define __ATTRIBUTE_CTOR__ __attribute__((constructor))\n" - << "#define __ATTRIBUTE_DTOR__ __attribute__((destructor))\n" - << "#else\n" - << "#define LLVM_NAN(NanStr) ((double)NAN) /* Double */\n" - << "#define LLVM_NANF(NanStr) ((float)NAN)) /* Float */\n" - //<< "#define LLVM_NANS(NanStr) ((double)NAN) /* Double */\n" - //<< "#define LLVM_NANSF(NanStr) ((single)NAN) /* Float */\n" - << "#define LLVM_INF ((double)INFINITY) /* Double */\n" - << "#define LLVM_INFF ((float)INFINITY) /* Float */\n" - << "#define LLVM_PREFETCH(addr,rw,locality) /* PREFETCH */\n" - << "#define __ATTRIBUTE_CTOR__ \"__attribute__((constructor)) not supported on this compiler\"\n" - << "#define __ATTRIBUTE_DTOR__ \"__attribute__((destructor)) not supported on this compiler\"\n" - << "#endif\n\n"; - - Out << "#if !defined(__GNUC__) || __GNUC__ < 4 /* Old GCC's, or compilers not GCC */ \n" - << "#define __builtin_stack_save() 0 /* not implemented */\n" - << "#define __builtin_stack_restore(X) /* noop */\n" - << "#endif\n\n"; + << "#define LLVM_NAN(NanStr) __builtin_nan(NanStr) /* Double */\n" + << "#define LLVM_NANF(NanStr) __builtin_nanf(NanStr) /* Float */\n" + //<< "#define LLVM_NANS(NanStr) __builtin_nans(NanStr) /* Double */\n" + //<< "#define LLVM_NANSF(NanStr) __builtin_nansf(NanStr) /* Float */\n" + << "#define LLVM_INF __builtin_inf() /* Double */\n" + << "#define LLVM_INFF __builtin_inff() /* Float */\n" + << "#define LLVM_PREFETCH(addr,rw,locality) " + "__builtin_prefetch(addr,rw,locality)\n" + << "#define __ATTRIBUTE_CTOR__ __attribute__((constructor))\n" + << "#define __ATTRIBUTE_DTOR__ __attribute__((destructor))\n" + << "#else\n" + << "#define LLVM_NAN(NanStr) ((double)NAN) /* Double */\n" + << "#define LLVM_NANF(NanStr) ((float)NAN)) /* Float */\n" + //<< "#define LLVM_NANS(NanStr) ((double)NAN) /* Double */\n" + //<< "#define LLVM_NANSF(NanStr) ((single)NAN) /* Float */\n" + << "#define LLVM_INF ((double)INFINITY) /* Double */\n" + << "#define LLVM_INFF ((float)INFINITY) /* Float */\n" + << "#define LLVM_PREFETCH(addr,rw,locality) /* PREFETCH */\n" + << "#define __ATTRIBUTE_CTOR__ \"__attribute__((constructor)) not " + "supported on this compiler\"\n" + << "#define __ATTRIBUTE_DTOR__ \"__attribute__((destructor)) not " + "supported on this compiler\"\n" + << "#endif\n\n"; + + Out << "#if !defined(__GNUC__) || __GNUC__ < 4 /* Old GCC's, or compilers " + "not GCC */ \n" + << "#define __builtin_stack_save() 0 /* not implemented */\n" + << "#define __builtin_stack_restore(X) /* noop */\n" + << "#endif\n\n"; // Output typedefs for 128-bit integers - Out << "#if defined(__GNUC__) && defined(__LP64__) /* 128-bit integer types */\n" - << "typedef int __attribute__((mode(TI))) int128_t;\n" - << "typedef unsigned __attribute__((mode(TI))) uint128_t;\n" - << "#define UINT128_C(hi, lo) (((uint128_t)(hi) << 64) | (uint128_t)(lo))\n" - << "static __forceinline uint128_t llvm_ctor_u128(ulong hi, ulong lo) {" - << " return UINT128_C(hi, lo); }\n" - << "static __forceinline bool llvm_icmp_eq_u128(uint128_t l, uint128_t r) {" - << " return l == r; }\n" - << "static __forceinline bool llvm_icmp_ne_u128(uint128_t l, uint128_t r) {" - << " return l != r; }\n" - << "static __forceinline bool llvm_icmp_ule_u128(uint128_t l, uint128_t r) {" - << " return l <= r; }\n" - << "static __forceinline bool llvm_icmp_sle_i128(int128_t l, int128_t r) {" - << " return l <= r; }\n" - << "static __forceinline bool llvm_icmp_uge_u128(uint128_t l, uint128_t r) {" - << " return l >= r; }\n" - << "static __forceinline bool llvm_icmp_sge_i128(int128_t l, int128_t r) {" - << " return l >= r; }\n" - << "static __forceinline bool llvm_icmp_ult_u128(uint128_t l, uint128_t r) {" - << " return l < r; }\n" - << "static __forceinline bool llvm_icmp_slt_i128(int128_t l, int128_t r) {" - << " return l < r; }\n" - << "static __forceinline bool llvm_icmp_ugt_u128(uint128_t l, uint128_t r) {" - << " return l > r; }\n" - << "static __forceinline bool llvm_icmp_sgt_i128(int128_t l, int128_t r) {" - << " return l > r; }\n" - - << "#else /* manual 128-bit types */\n" - // TODO: field order should be reversed for big-endian - << "typedef struct { ulong lo; ulong hi; } uint128_t;\n" - << "typedef uint128_t int128_t;\n" - << "#define UINT128_C(hi, lo) {(lo), (hi)}\n" // only use in Static context - << "static __forceinline uint128_t llvm_ctor_u128(ulong hi, ulong lo) {" - << " uint128_t r; r.lo = lo; r.hi = hi; return r; }\n" - << "static __forceinline bool llvm_icmp_eq_u128(uint128_t l, uint128_t r) {" - << " return l.hi == r.hi && l.lo == r.lo; }\n" - << "static __forceinline bool llvm_icmp_ne_u128(uint128_t l, uint128_t r) {" - << " return l.hi != r.hi || l.lo != r.lo; }\n" - << "static __forceinline bool llvm_icmp_ule_u128(uint128_t l, uint128_t r) {" - << " return l.hi < r.hi ? 1 : (l.hi == r.hi ? l.lo <= l.lo : 0); }\n" - << "static __forceinline bool llvm_icmp_sle_i128(int128_t l, int128_t r) {" - << " return (long)l.hi < (long)r.hi ? 1 : (l.hi == r.hi ? (long)l.lo <= (long)l.lo : 0); }\n" - << "static __forceinline bool llvm_icmp_uge_u128(uint128_t l, uint128_t r) {" - << " return l.hi > r.hi ? 1 : (l.hi == r.hi ? l.lo >= l.hi : 0); }\n" - << "static __forceinline bool llvm_icmp_sge_i128(int128_t l, int128_t r) {" - << " return (long)l.hi > (long)r.hi ? 1 : (l.hi == r.hi ? (long)l.lo >= (long)l.lo : 0); }\n" - << "static __forceinline bool llvm_icmp_ult_u128(uint128_t l, uint128_t r) {" - << " return l.hi < r.hi ? 1 : (l.hi == r.hi ? l.lo < l.hi : 0); }\n" - << "static __forceinline bool llvm_icmp_slt_i128(int128_t l, int128_t r) {" - << " return (long)l.hi < (long)r.hi ? 1 : (l.hi == r.hi ? (long)l.lo < (long)l.lo : 0); }\n" - << "static __forceinline bool llvm_icmp_ugt_u128(uint128_t l, uint128_t r) {" - << " return l.hi > r.hi ? 1 : (l.hi == r.hi ? l.lo > l.hi : 0); }\n" - << "static __forceinline bool llvm_icmp_sgt_i128(int128_t l, int128_t r) {" - << " return (long)l.hi > (long)r.hi ? 1 : (l.hi == r.hi ? (long)l.lo > (long)l.lo : 0); }\n" - << "#define __emulate_i128\n" - << "#endif\n\n"; + Out << "#if defined(__GNUC__) && defined(__LP64__) /* 128-bit integer types " + "*/\n" + << "typedef int __attribute__((mode(TI))) int128_t;\n" + << "typedef unsigned __attribute__((mode(TI))) uint128_t;\n" + << "#define UINT128_C(hi, lo) (((uint128_t)(hi) << 64) | " + "(uint128_t)(lo))\n" + << "static __forceinline uint128_t llvm_ctor_u128(ulong hi, ulong lo) {" + << " return UINT128_C(hi, lo); }\n" + << "static __forceinline bool llvm_icmp_eq_u128(uint128_t l, uint128_t " + "r) {" + << " return l == r; }\n" + << "static __forceinline bool llvm_icmp_ne_u128(uint128_t l, uint128_t " + "r) {" + << " return l != r; }\n" + << "static __forceinline bool llvm_icmp_ule_u128(uint128_t l, uint128_t " + "r) {" + << " return l <= r; }\n" + << "static __forceinline bool llvm_icmp_sle_i128(int128_t l, int128_t r) " + "{" + << " return l <= r; }\n" + << "static __forceinline bool llvm_icmp_uge_u128(uint128_t l, uint128_t " + "r) {" + << " return l >= r; }\n" + << "static __forceinline bool llvm_icmp_sge_i128(int128_t l, int128_t r) " + "{" + << " return l >= r; }\n" + << "static __forceinline bool llvm_icmp_ult_u128(uint128_t l, uint128_t " + "r) {" + << " return l < r; }\n" + << "static __forceinline bool llvm_icmp_slt_i128(int128_t l, int128_t r) " + "{" + << " return l < r; }\n" + << "static __forceinline bool llvm_icmp_ugt_u128(uint128_t l, uint128_t " + "r) {" + << " return l > r; }\n" + << "static __forceinline bool llvm_icmp_sgt_i128(int128_t l, int128_t r) " + "{" + << " return l > r; }\n" + + << "#else /* manual 128-bit types */\n" + // TODO: field order should be reversed for big-endian + << "typedef struct { ulong lo; ulong hi; } uint128_t;\n" + << "typedef uint128_t int128_t;\n" + << "#define UINT128_C(hi, lo) {(lo), (hi)}\n" // only use in Static + // context + << "static __forceinline uint128_t llvm_ctor_u128(ulong hi, ulong lo) {" + << " uint128_t r; r.lo = lo; r.hi = hi; return r; }\n" + << "static __forceinline bool llvm_icmp_eq_u128(uint128_t l, uint128_t " + "r) {" + << " return l.hi == r.hi && l.lo == r.lo; }\n" + << "static __forceinline bool llvm_icmp_ne_u128(uint128_t l, uint128_t " + "r) {" + << " return l.hi != r.hi || l.lo != r.lo; }\n" + << "static __forceinline bool llvm_icmp_ule_u128(uint128_t l, uint128_t " + "r) {" + << " return l.hi < r.hi ? 1 : (l.hi == r.hi ? l.lo <= l.lo : 0); }\n" + << "static __forceinline bool llvm_icmp_sle_i128(int128_t l, int128_t r) " + "{" + << " return (long)l.hi < (long)r.hi ? 1 : (l.hi == r.hi ? (long)l.lo <= " + "(long)l.lo : 0); }\n" + << "static __forceinline bool llvm_icmp_uge_u128(uint128_t l, uint128_t " + "r) {" + << " return l.hi > r.hi ? 1 : (l.hi == r.hi ? l.lo >= l.hi : 0); }\n" + << "static __forceinline bool llvm_icmp_sge_i128(int128_t l, int128_t r) " + "{" + << " return (long)l.hi > (long)r.hi ? 1 : (l.hi == r.hi ? (long)l.lo >= " + "(long)l.lo : 0); }\n" + << "static __forceinline bool llvm_icmp_ult_u128(uint128_t l, uint128_t " + "r) {" + << " return l.hi < r.hi ? 1 : (l.hi == r.hi ? l.lo < l.hi : 0); }\n" + << "static __forceinline bool llvm_icmp_slt_i128(int128_t l, int128_t r) " + "{" + << " return (long)l.hi < (long)r.hi ? 1 : (l.hi == r.hi ? (long)l.lo < " + "(long)l.lo : 0); }\n" + << "static __forceinline bool llvm_icmp_ugt_u128(uint128_t l, uint128_t " + "r) {" + << " return l.hi > r.hi ? 1 : (l.hi == r.hi ? l.lo > l.hi : 0); }\n" + << "static __forceinline bool llvm_icmp_sgt_i128(int128_t l, int128_t r) " + "{" + << " return (long)l.hi > (long)r.hi ? 1 : (l.hi == r.hi ? (long)l.lo > " + "(long)l.lo : 0); }\n" + << "#define __emulate_i128\n" + << "#endif\n\n"; // We output GCC specific attributes to preserve 'linkonce'ness on globals. // If we aren't being compiled with GCC, just drop these attributes. Out << "#ifdef _MSC_VER /* Can only support \"linkonce\" vars with GCC */\n" - << "#define __attribute__(X)\n" - << "#endif\n\n"; + << "#define __attribute__(X)\n" + << "#endif\n\n"; } /// FindStaticTors - Given a static ctor/dtor list, unpack its contents into /// the StaticTors set. -static void FindStaticTors(GlobalVariable *GV, std::set<Function*> &StaticTors){ +static void FindStaticTors(GlobalVariable *GV, + std::set<Function *> &StaticTors) { ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer()); - if (!InitList) return; + if (!InitList) + return; for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) - if (ConstantStruct *CS = dyn_cast<ConstantStruct>(InitList->getOperand(i))){ - if (CS->getNumOperands() != 2) return; // Not array of 2-element structs. + if (ConstantStruct *CS = + dyn_cast<ConstantStruct>(InitList->getOperand(i))) { + if (CS->getNumOperands() != 2) + return; // Not array of 2-element structs. if (CS->getOperand(1)->isNullValue()) - return; // Found a null terminator, exit printing. + return; // Found a null terminator, exit printing. Constant *FP = CS->getOperand(1); if (ConstantExpr *CE = dyn_cast<ConstantExpr>(FP)) if (CE->isCast()) @@ -1768,7 +1935,8 @@ static void FindStaticTors(GlobalVariable *GV, std::set<Function*> &StaticTors){ enum SpecialGlobalClass { NotSpecial = 0, - GlobalCtors, GlobalDtors, + GlobalCtors, + GlobalDtors, NotPrinted }; @@ -1785,8 +1953,7 @@ static SpecialGlobalClass getGlobalVariableClass(GlobalVariable *GV) { // Otherwise, if it is other metadata, don't print it. This catches things // like debug information. - if (StringRef(GV->getSection()) == "llvm.metadata") - { + if (StringRef(GV->getSection()) == "llvm.metadata") { DEBUG(errs() << "Printing Metada!\n" << *GV << "\n"); return NotPrinted; } @@ -1796,7 +1963,7 @@ static SpecialGlobalClass getGlobalVariableClass(GlobalVariable *GV) { // PrintEscapedString - Print each character of the specified string, escaping // it if it is not printable or if it is an escape char. static void PrintEscapedString(const char *Str, unsigned Length, - raw_ostream &Out) { + raw_ostream &Out) { for (unsigned i = 0; i != Length; ++i) { unsigned char C = Str[i]; if (isprint(C) && C != '\\' && C != '"') @@ -1823,9 +1990,10 @@ bool CWriter::doInitialization(Module &M) { TD = new DataLayout(&M); IL = new IntrinsicLowering(*TD); - // CHECK: Looking at lib/CodeGen/IntrinsicsLowering.cpp this func not supported - // This func creates defs which are created once each call is referenced anyway - //IL->AddPrototypes(M); + // CHECK: Looking at lib/CodeGen/IntrinsicsLowering.cpp this func not + // supported This func creates defs which are created once each call is + // referenced anyway + // IL->AddPrototypes(M); #if 0 std::string Triple = TheModule->getTargetTriple(); @@ -1837,7 +2005,7 @@ bool CWriter::doInitialization(Module &M) { TAsm = Match->createMCAsmInfo(Triple); #endif TAsm = new CBEMCAsmInfo(); - MRI = new MCRegisterInfo(); + MRI = new MCRegisterInfo(); TCtx = new MCContext(TAsm, MRI, NULL); return false; } @@ -1883,17 +2051,18 @@ bool CWriter::doFinalization(Module &M) { void CWriter::generateHeader(Module &M) { // Keep track of which functions are static ctors/dtors so they can have // an attribute added to their prototypes. - std::set<Function*> StaticCtors, StaticDtors; - for (Module::global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) { + std::set<Function *> StaticCtors, StaticDtors; + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E; + ++I) { switch (getGlobalVariableClass(&*I)) { - default: break; - case GlobalCtors: - FindStaticTors(&*I, StaticCtors); - break; - case GlobalDtors: - FindStaticTors(&*I, StaticDtors); - break; + default: + break; + case GlobalCtors: + FindStaticTors(&*I, StaticCtors); + break; + case GlobalDtors: + FindStaticTors(&*I, StaticDtors); + break; } } @@ -1903,8 +2072,9 @@ void CWriter::generateHeader(Module &M) { // Out << "#include <setjmp.h>\n"; // Unwind support // Out << "#include <limits.h>\n"; // With overflow intrinsics support. // Out << "#include <stdint.h>\n"; // Sized integer support - // Out << "#include <math.h>\n"; // definitions for some math functions and numeric constants - // Out << "#include <APInt-C.h>\n"; // Implementations of many llvm intrinsics + // Out << "#include <math.h>\n"; // definitions for some math + // functions and numeric constants Out << "#include <APInt-C.h>\n"; // + // Implementations of many llvm intrinsics // // Provide a definition for `bool' if not compiling with a C++ compiler. // Out << "#ifndef __cplusplus\ntypedef unsigned char bool;\n#endif\n"; // Out << "\n"; @@ -1912,24 +2082,24 @@ void CWriter::generateHeader(Module &M) { // generateCompilerSpecificCode(Out, TD); Out << "\n\n/* Support for floating point constants */\n" - << "typedef ulong ConstantDoubleTy;\n" - << "typedef uint ConstantFloatTy;\n" - << "typedef struct { ulong f1; ushort f2; " - "ushort pad[3]; } ConstantFP80Ty;\n" - // This is used for both kinds of 128-bit long double; meaning differs. - << "typedef struct { ulong f1; ulong f2; }" - " ConstantFP128Ty;\n" - << "\n\n/* OpenCL Pragmas */\n" - << "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n" - << "#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable\n" - << "#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable\n" - << "\n\n/* Global Declarations */\n"; + << "typedef ulong ConstantDoubleTy;\n" + << "typedef uint ConstantFloatTy;\n" + << "typedef struct { ulong f1; ushort f2; " + "ushort pad[3]; } ConstantFP80Ty;\n" + // This is used for both kinds of 128-bit long double; meaning differs. + << "typedef struct { ulong f1; ulong f2; }" + " ConstantFP128Ty;\n" + << "\n\n/* OpenCL Pragmas */\n" + << "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n" + << "#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable\n" + << "#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable\n" + << "\n\n/* Global Declarations */\n"; // First output all the declarations for the program, because C requires // Functions & globals to be declared before they are used. if (!M.getModuleInlineAsm().empty()) { Out << "\n/* Module asm statements */\n" - << "__asm__ ("; + << "__asm__ ("; // Split the string into lines, to make it easier to read the .ll file. std::string Asm = M.getModuleInlineAsm(); @@ -1939,22 +2109,22 @@ void CWriter::generateHeader(Module &M) { // We found a newline, print the portion of the asm string from the // last newline up to this newline. Out << "\""; - PrintEscapedString(std::string(Asm.begin()+CurPos, Asm.begin()+NewLine), - Out); + PrintEscapedString( + std::string(Asm.begin() + CurPos, Asm.begin() + NewLine), Out); Out << "\\n\"\n"; - CurPos = NewLine+1; + CurPos = NewLine + 1; NewLine = Asm.find_first_of('\n', CurPos); } Out << "\""; - PrintEscapedString(std::string(Asm.begin()+CurPos, Asm.end()), Out); + PrintEscapedString(std::string(Asm.begin() + CurPos, Asm.end()), Out); Out << "\");\n" - << "/* End Module asm statements */\n"; + << "/* End Module asm statements */\n"; } // collect any remaining types raw_null_ostream NullOut; - for (Module::global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) { + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E; + ++I) { // Ignore special globals, such as debug info. if (getGlobalVariableClass(&*I)) continue; @@ -1966,8 +2136,9 @@ void CWriter::generateHeader(Module &M) { if (!M.global_empty()) { Out << "\n/* External Global Variable Declarations */\n"; for (Module::global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) { - if (!I->isDeclaration() || isEmptyType(I->getType()->getPointerElementType())) + I != E; ++I) { + if (!I->isDeclaration() || + isEmptyType(I->getType()->getPointerElementType())) continue; if (I->hasDLLImportStorageClass()) @@ -1987,8 +2158,8 @@ void CWriter::generateHeader(Module &M) { Type *ElTy = I->getType()->getElementType(); unsigned Alignment = I->getAlignment(); - bool IsOveraligned = Alignment && - Alignment > TD->getABITypeAlignment(ElTy); + bool IsOveraligned = + Alignment && Alignment > TD->getABITypeAlignment(ElTy); // if (IsOveraligned) // Out << "__MSALIGN__(" << Alignment << ") "; printTypeName(Out, ElTy, false) << ' ' << GetValueName(&*I); @@ -2005,64 +2176,53 @@ void CWriter::generateHeader(Module &M) { Out << "\n/* Function Declarations */\n"; // Store the intrinsics which will be declared/defined below. - SmallVector<Function*, 16> intrinsicsToDefine; + SmallVector<Function *, 16> intrinsicsToDefine; for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { // Don't print declarations for intrinsic functions. // Store the used intrinsics, which need to be explicitly defined. if (I->isIntrinsic()) { switch (I->getIntrinsicID()) { - default: - continue; - case Intrinsic::uadd_with_overflow: - case Intrinsic::sadd_with_overflow: - case Intrinsic::usub_with_overflow: - case Intrinsic::ssub_with_overflow: - case Intrinsic::umul_with_overflow: - case Intrinsic::smul_with_overflow: - case Intrinsic::bswap: - case Intrinsic::ceil: - case Intrinsic::ctlz: - case Intrinsic::ctpop: - case Intrinsic::cttz: - case Intrinsic::fabs: - case Intrinsic::floor: - case Intrinsic::fma: - case Intrinsic::fmuladd: - case Intrinsic::pow: - case Intrinsic::powi: - case Intrinsic::rint: - case Intrinsic::sqrt: - case Intrinsic::trunc: - intrinsicsToDefine.push_back(&*I); - continue; + default: + continue; + case Intrinsic::uadd_with_overflow: + case Intrinsic::sadd_with_overflow: + case Intrinsic::usub_with_overflow: + case Intrinsic::ssub_with_overflow: + case Intrinsic::umul_with_overflow: + case Intrinsic::smul_with_overflow: + case Intrinsic::bswap: + case Intrinsic::ceil: + case Intrinsic::ctlz: + case Intrinsic::ctpop: + case Intrinsic::cttz: + case Intrinsic::fabs: + case Intrinsic::floor: + case Intrinsic::fma: + case Intrinsic::fmuladd: + case Intrinsic::pow: + case Intrinsic::powi: + case Intrinsic::rint: + case Intrinsic::sqrt: + case Intrinsic::trunc: + intrinsicsToDefine.push_back(&*I); + continue; } } // Skip a few functions that have already been defined in headers - if (I->getName() == "setjmp" || - I->getName() == "longjmp" || - I->getName() == "_setjmp" || - I->getName() == "siglongjmp" || - I->getName() == "sigsetjmp" || - I->getName() == "pow" || - I->getName() == "powf" || - I->getName() == "sqrt" || - I->getName() == "sqrtf" || - I->getName() == "trunc" || - I->getName() == "truncf" || - I->getName() == "rint" || - I->getName() == "rintf" || - I->getName() == "floor" || - I->getName() == "floorf" || - I->getName() == "ceil" || - I->getName() == "ceilf" || - I->getName() == "alloca" || - I->getName() == "_alloca" || - I->getName() == "_chkstk" || - I->getName() == "__chkstk" || - I->getName() == "___chkstk_ms") - continue; + if (I->getName() == "setjmp" || I->getName() == "longjmp" || + I->getName() == "_setjmp" || I->getName() == "siglongjmp" || + I->getName() == "sigsetjmp" || I->getName() == "pow" || + I->getName() == "powf" || I->getName() == "sqrt" || + I->getName() == "sqrtf" || I->getName() == "trunc" || + I->getName() == "truncf" || I->getName() == "rint" || + I->getName() == "rintf" || I->getName() == "floor" || + I->getName() == "floorf" || I->getName() == "ceil" || + I->getName() == "ceilf" || I->getName() == "alloca" || + I->getName() == "_alloca" || I->getName() == "_chkstk" || + I->getName() == "__chkstk" || I->getName() == "___chkstk_ms") + continue; if (I->hasDLLImportStorageClass()) Out << "__declspec(dllimport) "; @@ -2095,7 +2255,7 @@ void CWriter::generateHeader(Module &M) { if (!M.global_empty()) { Out << "\n\n/* Global Variable Definitions and Initialization */\n"; for (Module::global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) { + I != E; ++I) { declareOneGlobalVariable(&*I); } } @@ -2103,9 +2263,10 @@ void CWriter::generateHeader(Module &M) { // Alias declarations... if (!M.alias_empty()) { Out << "\n/* External Alias Declarations */\n"; - for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end(); - I != E; ++I) { - assert(!I->isDeclaration() && !isEmptyType(I->getType()->getPointerElementType())); + for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end(); I != E; + ++I) { + assert(!I->isDeclaration() && + !isEmptyType(I->getType()->getPointerElementType())); if (I->hasLocalLinkage()) continue; // Internal Global @@ -2120,8 +2281,8 @@ void CWriter::generateHeader(Module &M) { Type *ElTy = I->getType()->getElementType(); unsigned Alignment = I->getAlignment(); - bool IsOveraligned = Alignment && - Alignment > TD->getABITypeAlignment(ElTy); + bool IsOveraligned = + Alignment && Alignment > TD->getABITypeAlignment(ElTy); // if (IsOveraligned) // Out << "__MSALIGN__(" << Alignment << ") "; // GetValueName would resolve the alias, which is not what we want, @@ -2176,9 +2337,11 @@ void CWriter::generateHeader(Module &M) { Out << "return 1; }\n"; // Loop over all select operations - for (std::set<Type*>::iterator it = SelectDeclTypes.begin(), end = SelectDeclTypes.end(); - it != end; ++it) { - // static __forceinline Rty llvm_select_u8x4(<bool x 4> condition, <u8 x 4> iftrue, <u8 x 4> ifnot) { + for (std::set<Type *>::iterator it = SelectDeclTypes.begin(), + end = SelectDeclTypes.end(); + it != end; ++it) { + // static __forceinline Rty llvm_select_u8x4(<bool x 4> condition, <u8 x 4> + // iftrue, <u8 x 4> ifnot) { // Rty r = { // condition[0] ? iftrue[0] : ifnot[0], // condition[1] ? iftrue[1] : ifnot[1], @@ -2193,7 +2356,11 @@ void CWriter::generateHeader(Module &M) { printTypeString(Out, *it, false); Out << "("; if (isa<VectorType>(*it)) - printTypeNameUnaligned(Out, VectorType::get(Type::getInt1Ty((*it)->getContext()), (*it)->getVectorNumElements()), false); + printTypeNameUnaligned( + Out, + VectorType::get(Type::getInt1Ty((*it)->getContext()), + (*it)->getVectorNumElements()), + false); else Out << "bool"; Out << " condition, "; @@ -2206,19 +2373,22 @@ void CWriter::generateHeader(Module &M) { if (isa<VectorType>(*it)) { unsigned n, l = (*it)->getVectorNumElements(); for (n = 0; n < l; n++) { - Out << " r.vector[" << n << "] = condition.vector[" << n << "] ? iftrue.vector[" << n << "] : ifnot.vector[" << n << "];\n"; + Out << " r.vector[" << n << "] = condition.vector[" << n + << "] ? iftrue.vector[" << n << "] : ifnot.vector[" << n << "];\n"; } - } - else { + } else { Out << " r = condition ? iftrue : ifnot;\n"; } Out << " return r;\n}\n"; } // Loop over all compare operations - for (std::set< std::pair<CmpInst::Predicate, VectorType*> >::iterator it = CmpDeclTypes.begin(), end = CmpDeclTypes.end(); - it != end; ++it) { - // static __forceinline <bool x 4> llvm_icmp_ge_u8x4(<u8 x 4> l, <u8 x 4> r) { + for (std::set<std::pair<CmpInst::Predicate, VectorType *>>::iterator + it = CmpDeclTypes.begin(), + end = CmpDeclTypes.end(); + it != end; ++it) { + // static __forceinline <bool x 4> llvm_icmp_ge_u8x4(<u8 x 4> l, <u8 x 4> r) + // { // Rty c = { // l[0] >= r[0], // l[1] >= r[1], @@ -2228,7 +2398,8 @@ void CWriter::generateHeader(Module &M) { // return c; // } unsigned n, l = (*it).second->getVectorNumElements(); - VectorType *RTy = VectorType::get(Type::getInt1Ty((*it).second->getContext()), l); + VectorType *RTy = + VectorType::get(Type::getInt1Ty((*it).second->getContext()), l); bool isSigned = CmpInst::isSigned((*it).first); Out << "static __forceinline "; printTypeName(Out, RTy, isSigned); @@ -2248,25 +2419,38 @@ void CWriter::generateHeader(Module &M) { for (n = 0; n < l; n++) { Out << " c.vector[" << n << "] = "; if (CmpInst::isFPPredicate((*it).first)) { - Out << "llvm_fcmp_ " << getCmpPredicateName((*it).first) << "(l.vector[" << n << "], r.vector[" << n << "]);\n"; + Out << "llvm_fcmp_ " << getCmpPredicateName((*it).first) << "(l.vector[" + << n << "], r.vector[" << n << "]);\n"; } else { Out << "l.vector[" << n << "]"; switch ((*it).first) { - case CmpInst::ICMP_EQ: Out << " == "; break; - case CmpInst::ICMP_NE: Out << " != "; break; - case CmpInst::ICMP_ULE: - case CmpInst::ICMP_SLE: Out << " <= "; break; - case CmpInst::ICMP_UGE: - case CmpInst::ICMP_SGE: Out << " >= "; break; - case CmpInst::ICMP_ULT: - case CmpInst::ICMP_SLT: Out << " < "; break; - case CmpInst::ICMP_UGT: - case CmpInst::ICMP_SGT: Out << " > "; break; - default: + case CmpInst::ICMP_EQ: + Out << " == "; + break; + case CmpInst::ICMP_NE: + Out << " != "; + break; + case CmpInst::ICMP_ULE: + case CmpInst::ICMP_SLE: + Out << " <= "; + break; + case CmpInst::ICMP_UGE: + case CmpInst::ICMP_SGE: + Out << " >= "; + break; + case CmpInst::ICMP_ULT: + case CmpInst::ICMP_SLT: + Out << " < "; + break; + case CmpInst::ICMP_UGT: + case CmpInst::ICMP_SGT: + Out << " > "; + break; + default: #ifndef NDEBUG - errs() << "Invalid icmp predicate!" << (*it).first; + errs() << "Invalid icmp predicate!" << (*it).first; #endif - llvm_unreachable(0); + llvm_unreachable(0); } Out << "r.vector[" << n << "];\n"; } @@ -2275,9 +2459,13 @@ void CWriter::generateHeader(Module &M) { } // Loop over all (vector) cast operations - for (std::set<std::pair<CastInst::CastOps, std::pair<Type*, Type*>>>::iterator it = CastOpDeclTypes.begin(), end = CastOpDeclTypes.end(); - it != end; ++it) { - // static __forceinline <u32 x 4> llvm_ZExt_u8x4_u32x4(<u8 x 4> in) { // Src->isVector == Dst->isVector + for (std::set< + std::pair<CastInst::CastOps, std::pair<Type *, Type *>>>::iterator + it = CastOpDeclTypes.begin(), + end = CastOpDeclTypes.end(); + it != end; ++it) { + // static __forceinline <u32 x 4> llvm_ZExt_u8x4_u32x4(<u8 x 4> in) { // + // Src->isVector == Dst->isVector // Rty out = { // in[0], // in[1], @@ -2286,7 +2474,8 @@ void CWriter::generateHeader(Module &M) { // }; // return out; // } - // static __forceinline u32 llvm_BitCast_u8x4_u32(<u8 x 4> in) { // Src->bitsSize == Dst->bitsSize + // static __forceinline u32 llvm_BitCast_u8x4_u32(<u8 x 4> in) { // + // Src->bitsSize == Dst->bitsSize // union { // <u8 x 4> in; // u32 out; @@ -2299,18 +2488,18 @@ void CWriter::generateHeader(Module &M) { Type *DstTy = (*it).second.second; bool SrcSigned, DstSigned; switch (opcode) { - default: - SrcSigned = false; - DstSigned = false; - case Instruction::SIToFP: - SrcSigned = true; - DstSigned = false; - case Instruction::FPToSI: - SrcSigned = false; - DstSigned = true; - case Instruction::SExt: - SrcSigned = true; - DstSigned = true; + default: + SrcSigned = false; + DstSigned = false; + case Instruction::SIToFP: + SrcSigned = true; + DstSigned = false; + case Instruction::FPToSI: + SrcSigned = false; + DstSigned = true; + case Instruction::SExt: + SrcSigned = true; + DstSigned = true; } Out << "static __forceinline "; @@ -2349,20 +2538,34 @@ void CWriter::generateHeader(Module &M) { Out << " out;\n"; Out << " LLVM"; switch (opcode) { - case Instruction::UIToFP: Out << "UItoFP"; break; - case Instruction::SIToFP: Out << "SItoFP"; break; - case Instruction::Trunc: Out << "Trunc"; break; - //case Instruction::FPExt: - //case Instruction::FPTrunc: - case Instruction::ZExt: Out << "ZExt"; break; - case Instruction::FPToUI: Out << "FPtoUI"; break; - case Instruction::SExt: Out << "SExt"; break; - case Instruction::FPToSI: Out << "FPtoSI"; break; - default: - llvm_unreachable("Invalid cast opcode for i128"); + case Instruction::UIToFP: + Out << "UItoFP"; + break; + case Instruction::SIToFP: + Out << "SItoFP"; + break; + case Instruction::Trunc: + Out << "Trunc"; + break; + // case Instruction::FPExt: + // case Instruction::FPTrunc: + case Instruction::ZExt: + Out << "ZExt"; + break; + case Instruction::FPToUI: + Out << "FPtoUI"; + break; + case Instruction::SExt: + Out << "SExt"; + break; + case Instruction::FPToSI: + Out << "FPtoSI"; + break; + default: + llvm_unreachable("Invalid cast opcode for i128"); } Out << "(" << SrcTy->getPrimitiveSizeInBits() << ", &in, " - << DstTy->getPrimitiveSizeInBits() << ", &out);\n"; + << DstTy->getPrimitiveSizeInBits() << ", &out);\n"; Out << " return out;\n"; Out << "#endif\n"; Out << "}\n"; @@ -2370,9 +2573,12 @@ void CWriter::generateHeader(Module &M) { } // Loop over all simple vector operations - for (std::set<std::pair<unsigned, Type*>>::iterator it = InlineOpDeclTypes.begin(), end = InlineOpDeclTypes.end(); - it != end; ++it) { - // static __forceinline <u32 x 4> llvm_BinOp_u32x4(<u32 x 4> a, <u32 x 4> b) { + for (std::set<std::pair<unsigned, Type *>>::iterator + it = InlineOpDeclTypes.begin(), + end = InlineOpDeclTypes.end(); + it != end; ++it) { + // static __forceinline <u32 x 4> llvm_BinOp_u32x4(<u32 x 4> a, <u32 x 4> b) + // { // Rty r = { // a[0] OP b[0], // a[1] OP b[1], @@ -2416,7 +2622,7 @@ void CWriter::generateHeader(Module &M) { // C can't handle non-power-of-two integer types unsigned mask = 0; if (ElemTy->isIntegerTy()) { - IntegerType *ITy = static_cast<IntegerType*>(ElemTy); + IntegerType *ITy = static_cast<IntegerType *>(ElemTy); if (!ITy->isPowerOf2ByteWidth()) mask = ITy->getBitMask(); } @@ -2438,34 +2644,54 @@ void CWriter::generateHeader(Module &M) { Out << "fmodf(a.vector[" << n << "], b.vector[" << n << "])"; else if (ElemTy->isDoubleTy()) Out << "fmod(a.vector[" << n << "], b.vector[" << n << "])"; - else // all 3 flavors of long double + else // all 3 flavors of long double Out << "fmodl(a.vector[" << n << "], b.vector[" << n << "])"; } else { Out << "a.vector[" << n << "]"; switch (opcode) { - case Instruction::Add: - case Instruction::FAdd: Out << " + "; break; - case Instruction::Sub: - case Instruction::FSub: Out << " - "; break; - case Instruction::Mul: - case Instruction::FMul: Out << " * "; break; - case Instruction::URem: - case Instruction::SRem: - case Instruction::FRem: Out << " % "; break; - case Instruction::UDiv: - case Instruction::SDiv: - case Instruction::FDiv: Out << " / "; break; - case Instruction::And: Out << " & "; break; - case Instruction::Or: Out << " | "; break; - case Instruction::Xor: Out << " ^ "; break; - case Instruction::Shl : Out << " << "; break; - case Instruction::LShr: - case Instruction::AShr: Out << " >> "; break; - default: + case Instruction::Add: + case Instruction::FAdd: + Out << " + "; + break; + case Instruction::Sub: + case Instruction::FSub: + Out << " - "; + break; + case Instruction::Mul: + case Instruction::FMul: + Out << " * "; + break; + case Instruction::URem: + case Instruction::SRem: + case Instruction::FRem: + Out << " % "; + break; + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::FDiv: + Out << " / "; + break; + case Instruction::And: + Out << " & "; + break; + case Instruction::Or: + Out << " | "; + break; + case Instruction::Xor: + Out << " ^ "; + break; + case Instruction::Shl: + Out << " << "; + break; + case Instruction::LShr: + case Instruction::AShr: + Out << " >> "; + break; + default: #ifndef NDEBUG - errs() << "Invalid operator type!" << opcode; + errs() << "Invalid operator type!" << opcode; #endif - llvm_unreachable(0); + llvm_unreachable(0); } Out << "b.vector[" << n << "]"; } @@ -2486,24 +2712,44 @@ void CWriter::generateHeader(Module &M) { } else { Out << "a"; switch (opcode) { - case Instruction::Add: Out << " + "; break; - case Instruction::Sub: Out << " - "; break; - case Instruction::Mul: Out << " * "; break; - case Instruction::URem: - case Instruction::SRem: Out << " % "; break; - case Instruction::UDiv: - case Instruction::SDiv: Out << " / "; break; - case Instruction::And: Out << " & "; break; - case Instruction::Or: Out << " | "; break; - case Instruction::Xor: Out << " ^ "; break; - case Instruction::Shl: Out << " << "; break; - case Instruction::LShr: - case Instruction::AShr: Out << " >> "; break; - default: + case Instruction::Add: + Out << " + "; + break; + case Instruction::Sub: + Out << " - "; + break; + case Instruction::Mul: + Out << " * "; + break; + case Instruction::URem: + case Instruction::SRem: + Out << " % "; + break; + case Instruction::UDiv: + case Instruction::SDiv: + Out << " / "; + break; + case Instruction::And: + Out << " & "; + break; + case Instruction::Or: + Out << " | "; + break; + case Instruction::Xor: + Out << " ^ "; + break; + case Instruction::Shl: + Out << " << "; + break; + case Instruction::LShr: + case Instruction::AShr: + Out << " >> "; + break; + default: #ifndef NDEBUG - errs() << "Invalid operator type!" << opcode; + errs() << "Invalid operator type!" << opcode; #endif - llvm_unreachable(0); + llvm_unreachable(0); } Out << "b;\n"; } @@ -2525,7 +2771,8 @@ void CWriter::generateHeader(Module &M) { } else if (opcode == Instruction::Xor) { Out << " r.hi = a.hi ^ b.hi;\n"; Out << " r.lo = a.lo ^ b.lo;\n"; - } else if (opcode == Instruction::Shl) { // reminder: undef behavior if b >= 128 + } else if (opcode == + Instruction::Shl) { // reminder: undef behavior if b >= 128 Out << " if (b.lo >= 64) {\n"; Out << " r.hi = (a.lo << (b.lo - 64));\n"; Out << " r.lo = 0;\n"; @@ -2540,26 +2787,44 @@ void CWriter::generateHeader(Module &M) { // everything that hasn't been manually implemented above Out << " LLVM"; switch (opcode) { - //case BinaryNeg: Out << "Neg"; break; - //case BinaryNot: Out << "FlipAllBits"; break; - case Instruction::Add: Out << "Add"; break; - case Instruction::Sub: Out << "Sub"; break; - case Instruction::Mul: Out << "Mul"; break; - case Instruction::URem: Out << "URem"; break; - case Instruction::SRem: Out << "SRem"; break; - case Instruction::UDiv: Out << "UDiv"; break; - case Instruction::SDiv: Out << "SDiv"; break; - //case Instruction::And: Out << "And"; break; - //case Instruction::Or: Out << "Or"; break; - //case Instruction::Xor: Out << "Xor"; break; - //case Instruction::Shl: Out << "Shl"; break; - case Instruction::LShr: Out << "LShr"; break; - case Instruction::AShr: Out << "AShr"; break; - default: + // case BinaryNeg: Out << "Neg"; break; + // case BinaryNot: Out << "FlipAllBits"; break; + case Instruction::Add: + Out << "Add"; + break; + case Instruction::Sub: + Out << "Sub"; + break; + case Instruction::Mul: + Out << "Mul"; + break; + case Instruction::URem: + Out << "URem"; + break; + case Instruction::SRem: + Out << "SRem"; + break; + case Instruction::UDiv: + Out << "UDiv"; + break; + case Instruction::SDiv: + Out << "SDiv"; + break; + // case Instruction::And: Out << "And"; break; + // case Instruction::Or: Out << "Or"; break; + // case Instruction::Xor: Out << "Xor"; break; + // case Instruction::Shl: Out << "Shl"; break; + case Instruction::LShr: + Out << "LShr"; + break; + case Instruction::AShr: + Out << "AShr"; + break; + default: #ifndef NDEBUG - errs() << "Invalid operator type!" << opcode; + errs() << "Invalid operator type!" << opcode; #endif - llvm_unreachable(0); + llvm_unreachable(0); } Out << "(16, &a, &b, &r);\n"; } @@ -2579,34 +2844,54 @@ void CWriter::generateHeader(Module &M) { Out << "fmodf(a, b)"; else if (ElemTy->isDoubleTy()) Out << "fmod(a, b)"; - else // all 3 flavors of long double + else // all 3 flavors of long double Out << "fmodl(a, b)"; } else { Out << "a"; switch (opcode) { - case Instruction::Add: - case Instruction::FAdd: Out << " + "; break; - case Instruction::Sub: - case Instruction::FSub: Out << " - "; break; - case Instruction::Mul: - case Instruction::FMul: Out << " * "; break; - case Instruction::URem: - case Instruction::SRem: - case Instruction::FRem: Out << " % "; break; - case Instruction::UDiv: - case Instruction::SDiv: - case Instruction::FDiv: Out << " / "; break; - case Instruction::And: Out << " & "; break; - case Instruction::Or: Out << " | "; break; - case Instruction::Xor: Out << " ^ "; break; - case Instruction::Shl : Out << " << "; break; - case Instruction::LShr: - case Instruction::AShr: Out << " >> "; break; - default: + case Instruction::Add: + case Instruction::FAdd: + Out << " + "; + break; + case Instruction::Sub: + case Instruction::FSub: + Out << " - "; + break; + case Instruction::Mul: + case Instruction::FMul: + Out << " * "; + break; + case Instruction::URem: + case Instruction::SRem: + case Instruction::FRem: + Out << " % "; + break; + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::FDiv: + Out << " / "; + break; + case Instruction::And: + Out << " & "; + break; + case Instruction::Or: + Out << " | "; + break; + case Instruction::Xor: + Out << " ^ "; + break; + case Instruction::Shl: + Out << " << "; + break; + case Instruction::LShr: + case Instruction::AShr: + Out << " >> "; + break; + default: #ifndef NDEBUG - errs() << "Invalid operator type!" << opcode; + errs() << "Invalid operator type!" << opcode; #endif - llvm_unreachable(0); + llvm_unreachable(0); } Out << "b"; if (mask) @@ -2618,9 +2903,11 @@ void CWriter::generateHeader(Module &M) { } // Loop over all inline constructors - for (std::set<Type*>::iterator it = CtorDeclTypes.begin(), end = CtorDeclTypes.end(); - it != end; ++it) { - // static __forceinline <u32 x 4> llvm_ctor_u32x4(u32 x1, u32 x2, u32 x3, u32 x4) { + for (std::set<Type *>::iterator it = CtorDeclTypes.begin(), + end = CtorDeclTypes.end(); + it != end; ++it) { + // static __forceinline <u32 x 4> llvm_ctor_u32x4(u32 x1, u32 x2, u32 x3, + // u32 x4) { // Rty r = { // x1, x2, x3, x4 // }; @@ -2634,10 +2921,12 @@ void CWriter::generateHeader(Module &M) { StructType *STy = dyn_cast<StructType>(*it); ArrayType *ATy = dyn_cast<ArrayType>(*it); VectorType *VTy = dyn_cast<VectorType>(*it); - unsigned e = (STy ? STy->getNumElements() : (ATy ? ATy->getNumElements() : VTy->getNumElements())); + unsigned e = (STy ? STy->getNumElements() + : (ATy ? ATy->getNumElements() : VTy->getNumElements())); bool printed = false; for (unsigned i = 0; i != e; ++i) { - Type *ElTy = STy ? STy->getElementType(i) : (*it)->getSequentialElementType(); + Type *ElTy = + STy ? STy->getElementType(i) : (*it)->getSequentialElementType(); if (isEmptyType(ElTy)) Out << " /* "; else if (printed) @@ -2653,7 +2942,8 @@ void CWriter::generateHeader(Module &M) { printTypeName(Out, *it); Out << " r;"; for (unsigned i = 0; i != e; ++i) { - Type *ElTy = STy ? STy->getElementType(i) : (*it)->getSequentialElementType(); + Type *ElTy = + STy ? STy->getElementType(i) : (*it)->getSequentialElementType(); if (isEmptyType(ElTy)) continue; if (STy) @@ -2669,9 +2959,9 @@ void CWriter::generateHeader(Module &M) { } // Emit definitions of the intrinsics. - for (SmallVector<Function*, 16>::iterator - I = intrinsicsToDefine.begin(), - E = intrinsicsToDefine.end(); I != E; ++I) { + for (SmallVector<Function *, 16>::iterator I = intrinsicsToDefine.begin(), + E = intrinsicsToDefine.end(); + I != E; ++I) { printIntrinsicDefinition(**I, Out); } @@ -2679,7 +2969,7 @@ void CWriter::generateHeader(Module &M) { Out << "\n\n/* Function Bodies */\n"; } -void CWriter::declareOneGlobalVariable(GlobalVariable* I) { +void CWriter::declareOneGlobalVariable(GlobalVariable *I) { if (I->isDeclaration() || isEmptyType(I->getType()->getPointerElementType())) return; @@ -2701,8 +2991,7 @@ void CWriter::declareOneGlobalVariable(GlobalVariable* I) { Type *ElTy = I->getType()->getElementType(); unsigned Alignment = I->getAlignment(); - bool IsOveraligned = Alignment && - Alignment > TD->getABITypeAlignment(ElTy); + bool IsOveraligned = Alignment && Alignment > TD->getABITypeAlignment(ElTy); // if (IsOveraligned) // Out << "__MSALIGN__(" << Alignment << ") "; printTypeName(Out, ElTy, false) << ' ' << GetValueName(I); @@ -2726,13 +3015,13 @@ void CWriter::declareOneGlobalVariable(GlobalVariable* I) { // and common, so we disable this optimization. // FIXME common linkage should avoid this problem. if (!I->getInitializer()->isNullValue()) { - Out << " = " ; + Out << " = "; writeOperand(I->getInitializer(), ContextStatic); } else if (I->hasWeakLinkage()) { // We have to specify an initializer, but it doesn't have to be // complete. If the value is an aggregate, print out { 0 }, and let // the compiler figure out the rest of the zeros. - Out << " = " ; + Out << " = "; if (I->getInitializer()->getType()->isStructTy() || I->getInitializer()->getType()->isVectorTy()) { Out << "{ 0 }"; @@ -2756,7 +3045,8 @@ void CWriter::printFloatingPointConstants(Function &F) { // precision. // for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I) - for (Instruction::op_iterator I_Op = I->op_begin(), E_Op = I->op_end(); I_Op != E_Op; ++I_Op) + for (Instruction::op_iterator I_Op = I->op_begin(), E_Op = I->op_end(); + I_Op != E_Op; ++I_Op) if (const Constant *C = dyn_cast<Constant>(I_Op)) printFloatingPointConstants(C); Out << '\n'; @@ -2779,44 +3069,39 @@ void CWriter::printFloatingPointConstants(const Constant *C) { FPConstantMap.count(FPC)) return; - FPConstantMap[FPC] = FPCounter; // Number the FP constants + FPConstantMap[FPC] = FPCounter; // Number the FP constants if (FPC->getType() == Type::getDoubleTy(FPC->getContext())) { double Val = FPC->getValueAPF().convertToDouble(); uint64_t i = FPC->getValueAPF().bitcastToAPInt().getZExtValue(); - Out << "const ConstantDoubleTy FPConstant" << FPCounter++ - << " = 0x" << utohexstr(i) - << "ULL; /* " << Val << " */\n"; + Out << "const ConstantDoubleTy FPConstant" << FPCounter++ << " = 0x" + << utohexstr(i) << "ULL; /* " << Val << " */\n"; } else if (FPC->getType() == Type::getFloatTy(FPC->getContext())) { float Val = FPC->getValueAPF().convertToFloat(); - uint32_t i = (uint32_t)FPC->getValueAPF().bitcastToAPInt(). - getZExtValue(); - Out << "const ConstantFloatTy FPConstant" << FPCounter++ - << " = 0x" << utohexstr(i) - << "U; /* " << Val << " */\n"; + uint32_t i = (uint32_t)FPC->getValueAPF().bitcastToAPInt().getZExtValue(); + Out << "const ConstantFloatTy FPConstant" << FPCounter++ << " = 0x" + << utohexstr(i) << "U; /* " << Val << " */\n"; } else if (FPC->getType() == Type::getX86_FP80Ty(FPC->getContext())) { // api needed to prevent premature destruction const APInt api = FPC->getValueAPF().bitcastToAPInt(); const uint64_t *p = api.getRawData(); - Out << "const ConstantFP80Ty FPConstant" << FPCounter++ - << " = { 0x" << utohexstr(p[0]) - << "ULL, 0x" << utohexstr((uint16_t)p[1]) << ",{0,0,0}" - << "}; /* Long double constant */\n"; + Out << "const ConstantFP80Ty FPConstant" << FPCounter++ << " = { 0x" + << utohexstr(p[0]) << "ULL, 0x" << utohexstr((uint16_t)p[1]) + << ",{0,0,0}" + << "}; /* Long double constant */\n"; } else if (FPC->getType() == Type::getPPC_FP128Ty(FPC->getContext()) || - FPC->getType() == Type::getFP128Ty(FPC->getContext())) { + FPC->getType() == Type::getFP128Ty(FPC->getContext())) { const APInt api = FPC->getValueAPF().bitcastToAPInt(); const uint64_t *p = api.getRawData(); - Out << "const ConstantFP128Ty FPConstant" << FPCounter++ - << " = { 0x" - << utohexstr(p[0]) << ", 0x" << utohexstr(p[1]) - << "}; /* Long double constant */\n"; + Out << "const ConstantFP128Ty FPConstant" << FPCounter++ << " = { 0x" + << utohexstr(p[0]) << ", 0x" << utohexstr(p[1]) + << "}; /* Long double constant */\n"; } else { llvm_unreachable("Unknown float type!"); } } - /// printSymbolTable - Run through symbol table looking for type names. If a /// type name is found, emit its declaration... /// @@ -2830,7 +3115,7 @@ void CWriter::printModuleTypes(raw_ostream &Out) { Out << "} llvmBitCastUnion;\n"; // Keep track of which types have been printed so far. - std::set<Type*> TypesPrinted; + std::set<Type *> TypesPrinted; // Loop over all structures then push them into the stack so they are // printed in the correct order. @@ -2839,8 +3124,9 @@ void CWriter::printModuleTypes(raw_ostream &Out) { // forward-declare all structs here first { - std::set<Type*> TypesPrinted; - for (auto it = TypedefDeclTypes.begin(), end = TypedefDeclTypes.end(); it != end; ++it) { + std::set<Type *> TypesPrinted; + for (auto it = TypedefDeclTypes.begin(), end = TypedefDeclTypes.end(); + it != end; ++it) { forwardDeclareStructs(Out, *it, TypesPrinted); } } @@ -2848,31 +3134,35 @@ void CWriter::printModuleTypes(raw_ostream &Out) { // forward-declare all function pointer typedefs (Issue #2) { - std::set<Type*> TypesPrinted; - for (auto it = TypedefDeclTypes.begin(), end = TypedefDeclTypes.end(); it != end; ++it) { + std::set<Type *> TypesPrinted; + for (auto it = TypedefDeclTypes.begin(), end = TypedefDeclTypes.end(); + it != end; ++it) { forwardDeclareFunctionTypedefs(Out, *it, TypesPrinted); } } - Out << "\n/* Types Definitions */\n"; - for (auto it = TypedefDeclTypes.begin(), end = TypedefDeclTypes.end(); it != end; ++it) { + for (auto it = TypedefDeclTypes.begin(), end = TypedefDeclTypes.end(); + it != end; ++it) { printContainedTypes(Out, *it, TypesPrinted); } Out << "\n/* Function definitions */\n"; // Question: Is UnnamedFunctionIDs ever non-empty? - for (DenseMap<std::pair<FunctionType*, - std::pair<AttributeList, CallingConv::ID> >, unsigned>::iterator - I = UnnamedFunctionIDs.begin(), E = UnnamedFunctionIDs.end(); - I != E; ++I) { + for (DenseMap< + std::pair<FunctionType *, std::pair<AttributeList, CallingConv::ID>>, + unsigned>::iterator I = UnnamedFunctionIDs.begin(), + E = UnnamedFunctionIDs.end(); + I != E; ++I) { Out << '\n'; - std::pair<FunctionType*, std::pair<AttributeList, CallingConv::ID> > F = I->first; + std::pair<FunctionType *, std::pair<AttributeList, CallingConv::ID>> F = + I->first; if (F.second.first == AttributeList() && F.second.second == CallingConv::C) - if (!TypesPrinted.insert(F.first).second) continue; // already printed this above + if (!TypesPrinted.insert(F.first).second) + continue; // already printed this above // FIXME: Removing apparently unused function call - need to check printFunctionDeclaration(Out, F.first, F.second); @@ -2880,9 +3170,9 @@ void CWriter::printModuleTypes(raw_ostream &Out) { // We may have collected some intrinsic prototypes to emit. // Emit them now, before the function that uses them is emitted - for (std::vector<Function*>::iterator - I = prototypesToGen.begin(), E = prototypesToGen.end(); - I != E; ++I) { + for (std::vector<Function *>::iterator I = prototypesToGen.begin(), + E = prototypesToGen.end(); + I != E; ++I) { Out << '\n'; Function *F = *I; printFunctionProto(Out, F); @@ -2890,9 +3180,12 @@ void CWriter::printModuleTypes(raw_ostream &Out) { } } -void CWriter::forwardDeclareStructs(raw_ostream &Out, Type *Ty, std::set<Type*> &TypesPrinted) { - if (!TypesPrinted.insert(Ty).second) return; - if (isEmptyType(Ty)) return; +void CWriter::forwardDeclareStructs(raw_ostream &Out, Type *Ty, + std::set<Type *> &TypesPrinted) { + if (!TypesPrinted.insert(Ty).second) + return; + if (isEmptyType(Ty)) + return; for (auto I = Ty->subtype_begin(); I != Ty->subtype_end(); ++I) { forwardDeclareStructs(Out, *I, TypesPrinted); @@ -2903,9 +3196,12 @@ void CWriter::forwardDeclareStructs(raw_ostream &Out, Type *Ty, std::set<Type*> } } -void CWriter::forwardDeclareFunctionTypedefs(raw_ostream &Out, Type *Ty, std::set<Type*> &TypesPrinted) { - if (!TypesPrinted.insert(Ty).second) return; - if (isEmptyType(Ty)) return; +void CWriter::forwardDeclareFunctionTypedefs(raw_ostream &Out, Type *Ty, + std::set<Type *> &TypesPrinted) { + if (!TypesPrinted.insert(Ty).second) + return; + if (isEmptyType(Ty)) + return; for (auto I = Ty->subtype_begin(); I != Ty->subtype_end(); ++I) { forwardDeclareFunctionTypedefs(Out, *I, TypesPrinted); @@ -2920,15 +3216,17 @@ void CWriter::forwardDeclareFunctionTypedefs(raw_ostream &Out, Type *Ty, std::se // this one depends on. // void CWriter::printContainedTypes(raw_ostream &Out, Type *Ty, - std::set<Type*> &TypesPrinted) { + std::set<Type *> &TypesPrinted) { // Check to see if we have already printed this struct. - if (!TypesPrinted.insert(Ty).second) return; + if (!TypesPrinted.insert(Ty).second) + return; // Skip empty structs - if (isEmptyType(Ty)) return; + if (isEmptyType(Ty)) + return; // Print all contained types first. - for (Type::subtype_iterator I = Ty->subtype_begin(), - E = Ty->subtype_end(); I != E; ++I) + for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end(); + I != E; ++I) printContainedTypes(Out, *I, TypesPrinted); if (StructType *ST = dyn_cast<StructType>(Ty)) { @@ -2949,21 +3247,22 @@ static inline bool isFPIntBitCast(Instruction &I) { Type *SrcTy = I.getOperand(0)->getType(); Type *DstTy = I.getType(); return (SrcTy->isFloatingPointTy() && DstTy->isIntegerTy()) || - (DstTy->isFloatingPointTy() && SrcTy->isIntegerTy()); + (DstTy->isFloatingPointTy() && SrcTy->isIntegerTy()); } void CWriter::printFunction(Function &F) { bool isKernel = false; - if (NamedMDNode * KernelMD = F.getParent()->getNamedMetadata("opencl.kernels")) { + if (NamedMDNode *KernelMD = + F.getParent()->getNamedMetadata("opencl.kernels")) { for (auto iter : KernelMD->operands()) { - DEBUG( errs() << "Kernel Metadata: " << *iter << "\n"); + DEBUG(errs() << "Kernel Metadata: " << *iter << "\n"); const MDOperand *KernelMDOp = iter->operands().begin(); Metadata *KMD = KernelMDOp->get(); - if(ValueAsMetadata *KMDVAM = dyn_cast<ValueAsMetadata>(KMD)){ + if (ValueAsMetadata *KMDVAM = dyn_cast<ValueAsMetadata>(KMD)) { Value *KMDVal = KMDVAM->getValue(); Function *KMDFunc = dyn_cast<Function>(KMDVal); - if(KMDFunc == &F) { + if (KMDFunc == &F) { DEBUG(errs() << "-->Kernel Func: " << KMDFunc->getName() << "\n"); isKernel = true; } @@ -2975,12 +3274,15 @@ void CWriter::printFunction(Function &F) { bool isStructReturn = F.hasStructRetAttr(); assert(!F.isDeclaration()); - if (F.hasDLLImportStorageClass()) Out << "__declspec(dllimport) "; - if (F.hasDLLExportStorageClass()) Out << "__declspec(dllexport) "; - if (F.hasLocalLinkage()) Out << "static "; - printFunctionProto(Out, F.getFunctionType(), - std::make_pair(F.getAttributes(), F.getCallingConv()), - GetValueName(&F), + if (F.hasDLLImportStorageClass()) + Out << "__declspec(dllimport) "; + if (F.hasDLLExportStorageClass()) + Out << "__declspec(dllexport) "; + if (F.hasLocalLinkage()) + Out << "static "; + printFunctionProto( + Out, F.getFunctionType(), + std::make_pair(F.getAttributes(), F.getCallingConv()), GetValueName(&F), F.arg_begin(), // NOTE: replacing ArgumentList (LLVM-4) with arg iterator //&F.getArgumentList(), isKernel); @@ -2990,16 +3292,17 @@ void CWriter::printFunction(Function &F) { // If this is a struct return function, handle the result with magic. if (isStructReturn) { Type *StructTy = - cast<PointerType>(F.arg_begin()->getType())->getElementType(); + cast<PointerType>(F.arg_begin()->getType())->getElementType(); Out << " "; - printTypeName(Out, StructTy, false) << " StructReturn; /* Struct return temporary */\n"; + printTypeName(Out, StructTy, false) + << " StructReturn; /* Struct return temporary */\n"; Out << " "; printTypeName(Out, F.arg_begin()->getType(), false); Out << GetValueName(&*F.arg_begin()) << " = &StructReturn;\n"; } - - // Output all floating point constants that cannot be printed accurately. + + // Output all floating point constants that cannot be printed accurately. printFloatingPointConstants(F); bool PrintedVar = false; @@ -3009,8 +3312,8 @@ void CWriter::printFunction(Function &F) { if (AllocaInst *AI = isDirectAlloca(&*I)) { DEBUG(errs() << "Processing alloca inst: " << *AI << "\n"); unsigned Alignment = AI->getAlignment(); - bool IsOveraligned = Alignment && - Alignment > TD->getABITypeAlignment(AI->getAllocatedType()); + bool IsOveraligned = Alignment && Alignment > TD->getABITypeAlignment( + AI->getAllocatedType()); Out << " "; // if (IsOveraligned) // Out << "__MSALIGN__(" << Alignment << ") "; @@ -3020,20 +3323,21 @@ void CWriter::printFunction(Function &F) { Out << " __attribute__((aligned(" << Alignment << ")))"; if (AI->isArrayAllocation()) { DEBUG(errs() << "Alloca is an array allocation!\n"); - unsigned arraySize = dyn_cast<ConstantInt>(AI->getArraySize())->getZExtValue(); + unsigned arraySize = + dyn_cast<ConstantInt>(AI->getArraySize())->getZExtValue(); Out << "[" << arraySize << "]"; } Out << "; /* Address-exposed local */\n"; PrintedVar = true; - } else if (!isEmptyType(I->getType()) && - !isInlinableInst(*I)) { + } else if (!isEmptyType(I->getType()) && !isInlinableInst(*I)) { Out << " "; printTypeName(Out, I->getType(), false) << ' ' << GetValueName(&*I); Out << ";\n"; - if (isa<PHINode>(*I)) { // Print out PHI node temporaries as well... + if (isa<PHINode>(*I)) { // Print out PHI node temporaries as well... Out << " "; - printTypeName(Out, I->getType(), false) << ' ' << (GetValueName(&*I)+"__PHI_TEMPORARY"); + printTypeName(Out, I->getType(), false) + << ' ' << (GetValueName(&*I) + "__PHI_TEMPORARY"); Out << ";\n"; } PrintedVar = true; @@ -3043,7 +3347,7 @@ void CWriter::printFunction(Function &F) { // variable to hold the result of the BitCast. if (isFPIntBitCast(*I)) { Out << " llvmBitCastUnion " << GetValueName(&*I) - << "__BITCAST_TEMPORARY;\n"; + << "__BITCAST_TEMPORARY;\n"; PrintedVar = true; } } @@ -3054,11 +3358,13 @@ void CWriter::printFunction(Function &F) { // print the basic blocks // for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { - std::set<BasicBlock*> VisitSet; - BasicBlock* entry = &(F.getEntryBlock()); - // starting printing from entry, then CFG traversal will print the reachable blocks. + std::set<BasicBlock *> VisitSet; + BasicBlock *entry = &(F.getEntryBlock()); + // starting printing from entry, then CFG traversal will print the reachable + // blocks. printBBorLoop(entry); - // for (df_iterator<BasicBlock*> BI = df_begin(entry), BE = df_end(entry); BI!=BE; ++BI) { + // for (df_iterator<BasicBlock*> BI = df_begin(entry), BE = df_end(entry); + // BI!=BE; ++BI) { // BasicBlock *BB = *BI; // printBBorLoop(BB); // if(VisitedBlocks.find(BB) == VisitedBlocks.end()) { @@ -3075,29 +3381,28 @@ void CWriter::printFunction(Function &F) { Out << "}\n\n"; } - -bool CWriter::extractIndVarChain(Instruction *Inst, std::stack<Instruction*> *IndVarChain, Instruction *Branch, unsigned indent) { - //Traverse def-use chain of induction variable to make sure that - //it ends at the branch. Keep stack of all instructions leading there. - for(User *U : Inst->users()) { +bool CWriter::extractIndVarChain(Instruction *Inst, + std::stack<Instruction *> *IndVarChain, + Instruction *Branch, unsigned indent) { + // Traverse def-use chain of induction variable to make sure that + // it ends at the branch. Keep stack of all instructions leading there. + for (User *U : Inst->users()) { DEBUG(errs() << std::string(indent, '-')); DEBUG(errs() << "->Found user: " << *U << "\n"); - if(Instruction *UInst = dyn_cast<Instruction>(U)) { - if(UInst == Branch) { + if (Instruction *UInst = dyn_cast<Instruction>(U)) { + if (UInst == Branch) { DEBUG(errs() << "Found correct path, returning!\n"); return true; - } - else if (isa<PHINode>(UInst)) { + } else if (isa<PHINode>(UInst)) { DEBUG(errs() << "Reached a PHI Node => Wrong path! Returning!\n"); continue; - } - else { + } else { IndVarChain->push(UInst); - if(extractIndVarChain(UInst, IndVarChain, Branch, indent+2)) { + if (extractIndVarChain(UInst, IndVarChain, Branch, indent + 2)) { return true; - } - else { - DEBUG(errs() << "Wrong path, popping: " << *(IndVarChain->top()) << "\n"); + } else { + DEBUG(errs() << "Wrong path, popping: " << *(IndVarChain->top()) + << "\n"); IndVarChain->pop(); } } @@ -3107,53 +3412,60 @@ bool CWriter::extractIndVarChain(Instruction *Inst, std::stack<Instruction*> *In return false; } -bool CWriter::findLoopBranch(BranchInst **LBranch, BasicBlock* CurBlock, BasicBlock* LHeader, std::set<BasicBlock*>*visitSet) { +bool CWriter::findLoopBranch(BranchInst **LBranch, BasicBlock *CurBlock, + BasicBlock *LHeader, + std::set<BasicBlock *> *visitSet) { bool result = false; DEBUG(errs() << "Finding loop branch in " << CurBlock->getName() << "!\n"); - if(BranchInst *LBranchTemp = dyn_cast<BranchInst>(CurBlock->getTerminator())) { + if (BranchInst *LBranchTemp = + dyn_cast<BranchInst>(CurBlock->getTerminator())) { DEBUG(errs() << "Branch: " << *LBranchTemp << "\n"); - if(LBranchTemp->isConditional()) { - if(LBranchTemp->getSuccessor(0) == LHeader || LBranchTemp->getSuccessor(1) == LHeader) { + if (LBranchTemp->isConditional()) { + if (LBranchTemp->getSuccessor(0) == LHeader || + LBranchTemp->getSuccessor(1) == LHeader) { *LBranch = LBranchTemp; DEBUG(errs() << "Found Loop branch: " << **LBranch << "\n"); result = true; } else { - BasicBlock* NextBlock1 = LBranchTemp->getSuccessor(0); - BasicBlock* NextBlock2 = LBranchTemp->getSuccessor(1); - if(visitSet->find(NextBlock1) == visitSet->end()) { - DEBUG(errs() << "Visiting unvisited node: " << NextBlock1->getName() << "\n"); + BasicBlock *NextBlock1 = LBranchTemp->getSuccessor(0); + BasicBlock *NextBlock2 = LBranchTemp->getSuccessor(1); + if (visitSet->find(NextBlock1) == visitSet->end()) { + DEBUG(errs() << "Visiting unvisited node: " << NextBlock1->getName() + << "\n"); visitSet->insert(NextBlock1); result |= findLoopBranch(LBranch, NextBlock1, LHeader, visitSet); } - if(visitSet->find(NextBlock2) == visitSet->end()) { - DEBUG(errs() << "Visiting unvisited node: " << NextBlock2->getName() << "\n"); + if (visitSet->find(NextBlock2) == visitSet->end()) { + DEBUG(errs() << "Visiting unvisited node: " << NextBlock2->getName() + << "\n"); visitSet->insert(NextBlock2); result |= findLoopBranch(LBranch, NextBlock2, LHeader, visitSet); } } } else { - if(LBranchTemp->getSuccessor(0) == LHeader) { + if (LBranchTemp->getSuccessor(0) == LHeader) { *LBranch = LBranchTemp; DEBUG(errs() << "Found Loop branch: " << **LBranch << "\n"); result = true; } else { BasicBlock *NextBlock = LBranchTemp->getSuccessor(0); - if(visitSet->find(NextBlock) == visitSet->end()) { - DEBUG(errs() << "Visiting unvisited node: " << NextBlock->getName() << "\n"); + if (visitSet->find(NextBlock) == visitSet->end()) { + DEBUG(errs() << "Visiting unvisited node: " << NextBlock->getName() + << "\n"); visitSet->insert(NextBlock); result |= findLoopBranch(LBranch, NextBlock, LHeader, visitSet); } } } } - return result; + return result; } bool CWriter::traverseUseDefChain(Instruction *I, PHINode *PI) { DEBUG(errs() << "traversing: " << *I << "\n"); bool result = false; - if(PHINode *PHI = dyn_cast<PHINode>(I)) { + if (PHINode *PHI = dyn_cast<PHINode>(I)) { if (PI == PHI) { DEBUG(errs() << "returning true\n"); result = true; @@ -3164,9 +3476,9 @@ bool CWriter::traverseUseDefChain(Instruction *I, PHINode *PI) { } } else { for (Use &U : I->operands()) { - if(Instruction *UInst = dyn_cast<Instruction>(U)) { + if (Instruction *UInst = dyn_cast<Instruction>(U)) { result |= traverseUseDefChain(UInst, PI); - } + } } } return result; @@ -3178,1716 +3490,1839 @@ void CWriter::printLoop(Loop *L) { Out << "\n\n/* Processing Loop Block: " << L->getName() << " */\n"; DEBUG(errs() << "\n\n/* Processing Loop Block: " << L->getName() << " */\n"); - PHINode *InductionVariable; // auto *LoopLatch = L->getLoopLatch(); InductionDescriptor ID; DEBUG(errs() << "Looking for induction variables\n"); bool found = false; if (PHINode *IndVar = L->getCanonicalInductionVariable()) { - InductionVariable = IndVar; - found = true; - DEBUG(errs() << "Found canonical induction variable:\n" << *IndVar << "\n"); - } else { - for (auto I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { - PHINode *PHI = cast<PHINode>(I); - DEBUG(errs() << "Phi Node: " << *PHI << "\n"); - if(InductionDescriptor::isInductionPHI(PHI,L,PSE,ID)) { - DEBUG(errs() << "Found induction: " << *PHI << "\n"); - InductionVariable = PHI; - found = true; - break; - } - } - } + InductionVariable = IndVar; + found = true; + DEBUG(errs() << "Found canonical induction variable:\n" << *IndVar << "\n"); + } else { + for (auto I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { + PHINode *PHI = cast<PHINode>(I); + DEBUG(errs() << "Phi Node: " << *PHI << "\n"); + if (InductionDescriptor::isInductionPHI(PHI, L, PSE, ID)) { + DEBUG(errs() << "Found induction: " << *PHI << "\n"); + InductionVariable = PHI; + found = true; + break; + } + } + } - if(!found) { - llvm_unreachable("Couldn't find induction Variable in loop!\n"); - } + if (!found) { + llvm_unreachable("Couldn't find induction Variable in loop!\n"); + } - LInductionVars.insert(InductionVariable); - LoopIndVarsMap.insert(std::pair<Loop*, PHINode*>(L,InductionVariable)); - - Value *IV = dyn_cast<Value>(InductionVariable); - std::string IVName = GetValueName(IV); - - Optional<Loop::LoopBounds> OLB = L->getBounds(*SE); - if(OLB.hasValue()) { - Loop::LoopBounds LB = OLB.getValue(); - Value *StartValue = &(LB.getInitialIVValue()); - Instruction *StepInstruction = &(LB.getStepInst()); - Value *StepValue = LB.getStepValue(); - Value *FinalValue = &(LB.getFinalIVValue()); - ICmpInst::Predicate LoopPredicate = LB.getCanonicalPredicate(); - std::string BranchPredicate; - switch(LoopPredicate) { - case ICmpInst::ICMP_EQ: BranchPredicate = " == "; break; - case ICmpInst::ICMP_NE: BranchPredicate = " != "; break; - case ICmpInst::ICMP_ULE: - case ICmpInst::ICMP_SLE: BranchPredicate = " < "; break; - case ICmpInst::ICMP_UGE: - case ICmpInst::ICMP_SGE: BranchPredicate = " > "; break; - case ICmpInst::ICMP_ULT: - case ICmpInst::ICMP_SLT: BranchPredicate = " <= "; break; - case ICmpInst::ICMP_UGT: - case ICmpInst::ICMP_SGT: BranchPredicate = " >= "; break; - default: llvm_unreachable("Illegal ICmp predicate"); - } - DEBUG( - errs() << "Found a Loop Bounds Object!\n"; - errs() << "IV: " << *IV<< "\n"; - errs() << "StartValue: " << *StartValue<< "\n"; - errs() << "StepInstruction: " << *StepInstruction<< "\n"; - errs() << "StepValue: " << *StepValue<< "\n"; - errs() << "FinalValue: " << *FinalValue<< "\n"; - errs() << "Branch Predicate: " << BranchPredicate<< "\n"; - errs() << "Direction: " << ((LB.getDirection() == Loop::LoopBounds::Direction::Increasing) ? "increasing" : "decreasing") << "\n"; - ) - - std::string startStr; - if (ConstantInt *startConst = dyn_cast<ConstantInt>(StartValue)) { - startStr = std::to_string(startConst->getSExtValue()); - } else { - startStr = GetValueName(StartValue); - } - std::string finalStr; - if (ConstantInt *finalConst = dyn_cast<ConstantInt>(FinalValue)) { - finalStr = std::to_string(finalConst->getSExtValue()); - } else { - finalStr = GetValueName(FinalValue); - } - std::string stepStr; - if (ConstantInt *stepConst = dyn_cast<ConstantInt>(StepValue)) { - stepStr = std::to_string(stepConst->getSExtValue()); - } else { - stepStr = GetValueName(StepValue); - } - - DEBUG( - errs() << "\n for ( " << IVName << " = " << startStr << "; " - << IVName << BranchPredicate << finalStr << "; " - << IVName << " = " << IVName << " + " << stepStr << ") {\n"; - ) + LInductionVars.insert(InductionVariable); + LoopIndVarsMap.insert(std::pair<Loop *, PHINode *>(L, InductionVariable)); + + Value *IV = dyn_cast<Value>(InductionVariable); + std::string IVName = GetValueName(IV); + + Optional<Loop::LoopBounds> OLB = L->getBounds(*SE); + if (OLB.hasValue()) { + Loop::LoopBounds LB = OLB.getValue(); + Value *StartValue = &(LB.getInitialIVValue()); + Instruction *StepInstruction = &(LB.getStepInst()); + Value *StepValue = LB.getStepValue(); + Value *FinalValue = &(LB.getFinalIVValue()); + ICmpInst::Predicate LoopPredicate = LB.getCanonicalPredicate(); + std::string BranchPredicate; + switch (LoopPredicate) { + case ICmpInst::ICMP_EQ: + BranchPredicate = " == "; + break; + case ICmpInst::ICMP_NE: + BranchPredicate = " != "; + break; + case ICmpInst::ICMP_ULE: + case ICmpInst::ICMP_SLE: + BranchPredicate = " < "; + break; + case ICmpInst::ICMP_UGE: + case ICmpInst::ICMP_SGE: + BranchPredicate = " > "; + break; + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_SLT: + BranchPredicate = " <= "; + break; + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_SGT: + BranchPredicate = " >= "; + break; + default: + llvm_unreachable("Illegal ICmp predicate"); + } + DEBUG(errs() << "Found a Loop Bounds Object!\n"; + errs() << "IV: " << *IV << "\n"; + errs() << "StartValue: " << *StartValue << "\n"; + errs() << "StepInstruction: " << *StepInstruction << "\n"; + errs() << "StepValue: " << *StepValue << "\n"; + errs() << "FinalValue: " << *FinalValue << "\n"; + errs() << "Branch Predicate: " << BranchPredicate << "\n"; + errs() << "Direction: " + << ((LB.getDirection() == + Loop::LoopBounds::Direction::Increasing) + ? "increasing" + : "decreasing") + << "\n";) + + std::string startStr; + if (ConstantInt *startConst = dyn_cast<ConstantInt>(StartValue)) { + startStr = std::to_string(startConst->getSExtValue()); + } else { + startStr = GetValueName(StartValue); + } + std::string finalStr; + if (ConstantInt *finalConst = dyn_cast<ConstantInt>(FinalValue)) { + finalStr = std::to_string(finalConst->getSExtValue()); + } else { + finalStr = GetValueName(FinalValue); + } + std::string stepStr; + if (ConstantInt *stepConst = dyn_cast<ConstantInt>(StepValue)) { + stepStr = std::to_string(stepConst->getSExtValue()); + } else { + stepStr = GetValueName(StepValue); + } - Out << "\n for ( " << IVName << " = " << startStr << "; " - << IVName << BranchPredicate << finalStr << "; " - << IVName << " = " << IVName << " + " << stepStr << ") {\n"; + DEBUG(errs() << "\n for ( " << IVName << " = " << startStr << "; " + << IVName << BranchPredicate << finalStr << "; " << IVName + << " = " << IVName << " + " << stepStr << ") {\n";) - } else { - llvm_unreachable("No Loop Bounds!"); - DEBUG(errs() << "could not find a loop bounds object, searching for bounds manually!\n"); + Out << "\n for ( " << IVName << " = " << startStr << "; " << IVName + << BranchPredicate << finalStr << "; " << IVName << " = " << IVName + << " + " << stepStr << ") {\n"; + + } else { + llvm_unreachable("No Loop Bounds!"); + DEBUG(errs() << "could not find a loop bounds object, searching for bounds " + "manually!\n"); auto *ExitingBlock = L->getExitingBlock(); DEBUG(errs() << "Exiting Block: " << ExitingBlock->getName() << "\n"); auto *ExitingBranch = ExitingBlock->getTerminator(); DEBUG(errs() << "Exiting Branch: " << *ExitingBranch << "\n"); - Value *StartValue = ID.getStartValue(); - const SCEV *Step = ID.getStep(); - // unsigned IterationCount = SE->getSmallConstantMaxTripCount(L); - - std::string IVOp; - - if (const SCEVConstant *stepConst = dyn_cast<SCEVConstant>(Step)) { - if(stepConst->getAPInt().isNonNegative()) { - IVOp = " + "; - } - } + Value *StartValue = ID.getStartValue(); + const SCEV *Step = ID.getStep(); + // unsigned IterationCount = SE->getSmallConstantMaxTripCount(L); + std::string IVOp; - std::string BranchPredicate; - ICmpInst *BranchCondition = dyn_cast<ICmpInst>(dyn_cast<BranchInst>(ExitingBranch)->getCondition()); - switch(BranchCondition->getPredicate()) { - case ICmpInst::ICMP_EQ: BranchPredicate = " != "; break; - case ICmpInst::ICMP_NE: BranchPredicate = " == "; break; - case ICmpInst::ICMP_ULE: - case ICmpInst::ICMP_SLE: BranchPredicate = " > "; break; - case ICmpInst::ICMP_UGE: - case ICmpInst::ICMP_SGE: BranchPredicate = " < "; break; - case ICmpInst::ICMP_ULT: - case ICmpInst::ICMP_SLT: BranchPredicate = " >= "; break; - case ICmpInst::ICMP_UGT: - case ICmpInst::ICMP_SGT: BranchPredicate = " <= "; break; - default: llvm_unreachable("Illegal ICmp predicate"); - } + if (const SCEVConstant *stepConst = dyn_cast<SCEVConstant>(Step)) { + if (stepConst->getAPInt().isNonNegative()) { + IVOp = " + "; + } + } - DEBUG(errs() << "Branch Condition: " << *BranchCondition << "\n"); - - std::string compLHS, compRHS; - Value *CondOp1 = BranchCondition->getOperand(0); - DEBUG(errs() << "CondOp1: " << *CondOp1 << "\n"); - if (Constant *constOp1 = dyn_cast<Constant>(CondOp1)) { - DEBUG(errs() << "Condition Operand is a constant, inserting it as is.\n"); - compLHS = (constOp1->getUniqueInteger()).toString(10,1); - } else { - DEBUG(errs() << "Condition Operand is not a constant, "); - if(traverseUseDefChain(dyn_cast<Instruction>(CondOp1), InductionVariable)) { - DEBUG(errs() << "it is the IV.\n"); - compLHS = GetValueName(IV); - } else { - DEBUG(errs() << "it is another variable.\n"); - compLHS = GetValueName(CondOp1); - } - } - Value *CondOp2 = BranchCondition->getOperand(1); - DEBUG(errs() << "CondOp2: " << *CondOp2 << "\n"); - if (Constant *constOp2 = dyn_cast<Constant>(CondOp2)) { - DEBUG(errs() << "Condition Operand is a constant, inserting it as is.\n"); - compRHS = (constOp2->getUniqueInteger()).toString(10,1); - } else { - DEBUG(errs() << "Condition Operand is not a constant.\n"); - if(traverseUseDefChain(dyn_cast<Instruction>(CondOp2), InductionVariable)) { - DEBUG(errs() << "It is the IV.\n"); - compRHS = GetValueName(IV); - } else { - DEBUG(errs() << "It is another variable.\n"); - compRHS = GetValueName(CondOp2); - } - } + std::string BranchPredicate; + ICmpInst *BranchCondition = + dyn_cast<ICmpInst>(dyn_cast<BranchInst>(ExitingBranch)->getCondition()); + switch (BranchCondition->getPredicate()) { + case ICmpInst::ICMP_EQ: + BranchPredicate = " != "; + break; + case ICmpInst::ICMP_NE: + BranchPredicate = " == "; + break; + case ICmpInst::ICMP_ULE: + case ICmpInst::ICMP_SLE: + BranchPredicate = " > "; + break; + case ICmpInst::ICMP_UGE: + case ICmpInst::ICMP_SGE: + BranchPredicate = " < "; + break; + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_SLT: + BranchPredicate = " >= "; + break; + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_SGT: + BranchPredicate = " <= "; + break; + default: + llvm_unreachable("Illegal ICmp predicate"); + } - std::string startStr; - if (Constant *startConst = dyn_cast<Constant>(StartValue)) { - startStr = (startConst->getUniqueInteger()).toString(10,1); - } else { - startStr = GetValueName(StartValue); - } + DEBUG(errs() << "Branch Condition: " << *BranchCondition << "\n"); + std::string compLHS, compRHS; + Value *CondOp1 = BranchCondition->getOperand(0); + DEBUG(errs() << "CondOp1: " << *CondOp1 << "\n"); + if (Constant *constOp1 = dyn_cast<Constant>(CondOp1)) { + DEBUG(errs() << "Condition Operand is a constant, inserting it as is.\n"); + compLHS = (constOp1->getUniqueInteger()).toString(10, 1); + } else { + DEBUG(errs() << "Condition Operand is not a constant, "); + if (traverseUseDefChain(dyn_cast<Instruction>(CondOp1), + InductionVariable)) { + DEBUG(errs() << "it is the IV.\n"); + compLHS = GetValueName(IV); + } else { + DEBUG(errs() << "it is another variable.\n"); + compLHS = GetValueName(CondOp1); + } + } + Value *CondOp2 = BranchCondition->getOperand(1); + DEBUG(errs() << "CondOp2: " << *CondOp2 << "\n"); + if (Constant *constOp2 = dyn_cast<Constant>(CondOp2)) { + DEBUG(errs() << "Condition Operand is a constant, inserting it as is.\n"); + compRHS = (constOp2->getUniqueInteger()).toString(10, 1); + } else { + DEBUG(errs() << "Condition Operand is not a constant.\n"); + if (traverseUseDefChain(dyn_cast<Instruction>(CondOp2), + InductionVariable)) { + DEBUG(errs() << "It is the IV.\n"); + compRHS = GetValueName(IV); + } else { + DEBUG(errs() << "It is another variable.\n"); + compRHS = GetValueName(CondOp2); + } + } - DEBUG(errs() << " for ( " << IVName << " = " << startStr << "; " - << compLHS << BranchPredicate << compRHS << "; " - << IVName << " = " << IVName << IVOp << *Step << ") {\n"); + std::string startStr; + if (Constant *startConst = dyn_cast<Constant>(StartValue)) { + startStr = (startConst->getUniqueInteger()).toString(10, 1); + } else { + startStr = GetValueName(StartValue); + } - Out << "\n for ( " << IVName << " = " << startStr << "; " - << compLHS << BranchPredicate << compRHS << "; " - << IVName << " = " << IVName << IVOp << *Step << ") {\n"; - } + DEBUG(errs() << " for ( " << IVName << " = " << startStr << "; " << compLHS + << BranchPredicate << compRHS << "; " << IVName << " = " + << IVName << IVOp << *Step << ") {\n"); + Out << "\n for ( " << IVName << " = " << startStr << "; " << compLHS + << BranchPredicate << compRHS << "; " << IVName << " = " << IVName + << IVOp << *Step << ") {\n"; + } - BasicBlock *BB = L->getHeader(); - // printBBorLoop(BB); - printBasicBlock(BB); - // Loop *BBLoop = LI->getLoopFor(BB); - // if (BBLoop == L) - // printBasicBlock(BB); - // else if (BB == BBLoop->getHeader() && BBLoop->getParentLoop() == L) - // printLoop(BBLoop); - - // Out << " do { /* Syntactic loop '" << L->getHeader()->getName() - // << "' to make GCC happy */\n"; - // for (unsigned i = 0, e = L->getBlocks().size(); i != e; ++i) { - // BasicBlock *BB = L->getBlocks()[i]; - // Loop *BBLoop = LI->getLoopFor(BB); - // if (BBLoop == L) - // printBasicBlock(BB); - // else if (BB == BBLoop->getHeader() && BBLoop->getParentLoop() == L) - // printLoop(BBLoop); - // } - // Out << " } \n"; + BasicBlock *BB = L->getHeader(); + // printBBorLoop(BB); + printBasicBlock(BB); + // Loop *BBLoop = LI->getLoopFor(BB); + // if (BBLoop == L) + // printBasicBlock(BB); + // else if (BB == BBLoop->getHeader() && BBLoop->getParentLoop() == L) + // printLoop(BBLoop); + + // Out << " do { /* Syntactic loop '" << L->getHeader()->getName() + // << "' to make GCC happy */\n"; + // for (unsigned i = 0, e = L->getBlocks().size(); i != e; ++i) { + // BasicBlock *BB = L->getBlocks()[i]; + // Loop *BBLoop = LI->getLoopFor(BB); + // if (BBLoop == L) + // printBasicBlock(BB); + // else if (BB == BBLoop->getHeader() && BBLoop->getParentLoop() == L) + // printLoop(BBLoop); + // } + // Out << " } \n"; } void CWriter::printBasicBlock(BasicBlock *BB) { - DEBUG(errs() << "\n\nProcessing Basic Block: " << BB->getName() << "\n"); - Out << "\n\n/* Processing Basic Block: " << BB->getName() << " */\n"; - - // Don't print the label for the basic block if there are no uses, or if - // the only terminator use is the predecessor basic block's terminator. - // We have to scan the use list because PHI nodes use basic blocks too but - // do not require a label to be generated. - // - bool NeedsLabel = false; - for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) - if (isGotoCodeNecessary(*PI, BB)) { - NeedsLabel = true; - break; - } + DEBUG(errs() << "\n\nProcessing Basic Block: " << BB->getName() << "\n"); + Out << "\n\n/* Processing Basic Block: " << BB->getName() << " */\n"; - // if (NeedsLabel) Out << "/* " << GetValueName(BB) << ": */\n"; - Out << "/* " << GetValueName(BB) << ": */\n"; - - // Output all of the instructions in the basic block... - for (BasicBlock::iterator II = BB->begin(), E = --BB->end(); II != E; - ++II) { - Instruction *I = &*II; - DEBUG(errs() << "*********Processing: " << *I << "\n"); - bool skip = false; - for(Use &U : I->operands()) { - Value *v = U.get(); - if(PHINode *PN = dyn_cast<PHINode>(v)) { - if (LInductionVars.find(PN) != LInductionVars.end()) { - bool UserPHI = false; - bool UserCMP = false; - bool UserOTHER = false; - DEBUG(errs() << "Instruction uses induction variable\n"); - for (User *IUser : I->users()) { - if (Instruction *UserInst = dyn_cast<Instruction>(IUser)) { - DEBUG(errs() << "User: " << *UserInst << "\n"); - if (dyn_cast<PHINode>(UserInst)) { - UserPHI = true; - } else if (dyn_cast<ICmpInst>(UserInst)) { - UserCMP = true; - } else { - UserOTHER = true; - } - // skip = true; - // break; - } - } - if (UserPHI && UserCMP && !UserOTHER) { - skip = true; - } - } - } - if (skip) - break; - } - if(skip){ - DEBUG(errs() << "Skipping instruction that increments Induction Variable!\n"); - Out << "/* Skipped induction variable use: " << *I << " */\n"; - continue; - } - if(PHINode *PN = dyn_cast<PHINode>(I)) { - if (LInductionVars.find(PN) != LInductionVars.end()) { - DEBUG(errs() << "Skipping PHINode for Induction Variable!\n"); - Out << "/* PHINode of induction variable was here */\n"; - continue; - } - } - if (!isInlinableInst(*II) && !isDirectAlloca(&*II)) { - if (!isEmptyType(II->getType()) && - !isInlineAsm(*II)) - outputLValue(&*II); - else - Out << " "; - writeInstComputationInline(*II); - Out << ";\n"; - } else { - DEBUG(errs() << "Skipping inlinable or direct alloca!\n"); - } - } + // Don't print the label for the basic block if there are no uses, or if + // the only terminator use is the predecessor basic block's terminator. + // We have to scan the use list because PHI nodes use basic blocks too but + // do not require a label to be generated. + // + bool NeedsLabel = false; + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) + if (isGotoCodeNecessary(*PI, BB)) { + NeedsLabel = true; + break; + } - // Don't emit prefix or suffix for the terminator. - visit(*BB->getTerminator()); -} + // if (NeedsLabel) Out << "/* " << GetValueName(BB) << ": */\n"; + Out << "/* " << GetValueName(BB) << ": */\n"; + + // Output all of the instructions in the basic block... + for (BasicBlock::iterator II = BB->begin(), E = --BB->end(); II != E; ++II) { + Instruction *I = &*II; + DEBUG(errs() << "*********Processing: " << *I << "\n"); + bool skip = false; + for (Use &U : I->operands()) { + Value *v = U.get(); + if (PHINode *PN = dyn_cast<PHINode>(v)) { + if (LInductionVars.find(PN) != LInductionVars.end()) { + bool UserPHI = false; + bool UserCMP = false; + bool UserOTHER = false; + DEBUG(errs() << "Instruction uses induction variable\n"); + for (User *IUser : I->users()) { + if (Instruction *UserInst = dyn_cast<Instruction>(IUser)) { + DEBUG(errs() << "User: " << *UserInst << "\n"); + if (dyn_cast<PHINode>(UserInst)) { + UserPHI = true; + } else if (dyn_cast<ICmpInst>(UserInst)) { + UserCMP = true; + } else { + UserOTHER = true; + } + // skip = true; + // break; + } + } + if (UserPHI && UserCMP && !UserOTHER) { + skip = true; + } + } + } + if (skip) + break; + } + if (skip) { + DEBUG(errs() + << "Skipping instruction that increments Induction Variable!\n"); + Out << "/* Skipped induction variable use: " << *I << " */\n"; + continue; + } + if (PHINode *PN = dyn_cast<PHINode>(I)) { + if (LInductionVars.find(PN) != LInductionVars.end()) { + DEBUG(errs() << "Skipping PHINode for Induction Variable!\n"); + Out << "/* PHINode of induction variable was here */\n"; + continue; + } + } + if (!isInlinableInst(*II) && !isDirectAlloca(&*II)) { + if (!isEmptyType(II->getType()) && !isInlineAsm(*II)) + outputLValue(&*II); + else + Out << " "; + writeInstComputationInline(*II); + Out << ";\n"; + } else { + DEBUG(errs() << "Skipping inlinable or direct alloca!\n"); + } + } + // Don't emit prefix or suffix for the terminator. + visit(*BB->getTerminator()); +} // Specific Instruction type classes... note that all of the casts are // necessary because we use the instruction classes as opaque types... // void CWriter::visitReturnInst(ReturnInst &I) { - // If this is a struct return function, return the temporary struct. - bool isStructReturn = I.getParent()->getParent()->hasStructRetAttr(); + // If this is a struct return function, return the temporary struct. + bool isStructReturn = I.getParent()->getParent()->hasStructRetAttr(); - if (isStructReturn) { - Out << " return StructReturn;\n"; - return; - } + if (isStructReturn) { + Out << " return StructReturn;\n"; + return; + } - // Don't output a void return if this is the last basic block in the function - // unless that would make the basic block empty - if (I.getNumOperands() == 0 && - &*--I.getParent()->getParent()->end() == I.getParent() && - &*I.getParent()->begin() != &I) { - return; - } + // Don't output a void return if this is the last basic block in the function + // unless that would make the basic block empty + if (I.getNumOperands() == 0 && + &*--I.getParent()->getParent()->end() == I.getParent() && + &*I.getParent()->begin() != &I) { + return; + } - Out << " return"; - if (I.getNumOperands()) { - Out << ' '; - writeOperand(I.getOperand(0), ContextCasted); - } - Out << ";\n"; + Out << " return"; + if (I.getNumOperands()) { + Out << ' '; + writeOperand(I.getOperand(0), ContextCasted); + } + Out << ";\n"; } void CWriter::visitSwitchInst(SwitchInst &SI) { - Value* Cond = SI.getCondition(); - unsigned NumBits = cast<IntegerType>(Cond->getType())->getBitWidth(); - - if (SI.getNumCases() == 0) { // unconditional branch - printPHICopiesForSuccessor (SI.getParent(), SI.getDefaultDest(), 2); - printBranchToBlock(SI.getParent(), SI.getDefaultDest(), 2); - Out << "\n"; - - } else if (NumBits <= 64) { // model as a switch statement - Out << " switch ("; - writeOperand(Cond); - Out << ") {\n default:\n"; - printPHICopiesForSuccessor (SI.getParent(), SI.getDefaultDest(), 2); - printBranchToBlock(SI.getParent(), SI.getDefaultDest(), 2); - - - // CHECK: Needs much testing - for (auto Case : SI.cases()) { - ConstantInt* CaseVal = Case.getCaseValue(); - BasicBlock* Succ = Case.getCaseSuccessor(); - Out << " case "; - writeOperand(CaseVal); - Out << ":\n"; - printPHICopiesForSuccessor (SI.getParent(), Succ, 2); - if (isGotoCodeNecessary(SI.getParent(), Succ)) - printBranchToBlock(SI.getParent(), Succ, 2); - else - Out << " break;\n"; - } - Out << " }\n"; - - } else { // model as a series of if statements - Out << " "; - // CHECK: Needs much testing - for (auto Case : SI.cases()) { - Out << "if ("; - ConstantInt* CaseVal = Case.getCaseValue(); - BasicBlock* Succ = Case.getCaseSuccessor(); - ICmpInst *icmp = new ICmpInst(CmpInst::ICMP_EQ, Cond, CaseVal); - visitICmpInst(*icmp); - delete icmp; - Out << ") {\n"; - printPHICopiesForSuccessor (SI.getParent(), Succ, 2); - printBranchToBlock(SI.getParent(), Succ, 2); - Out << " } else "; - } - Out << "{\n"; - printPHICopiesForSuccessor (SI.getParent(), SI.getDefaultDest(), 2); - printBranchToBlock(SI.getParent(), SI.getDefaultDest(), 2); - Out << " }\n"; - } - Out << "\n"; + Value *Cond = SI.getCondition(); + unsigned NumBits = cast<IntegerType>(Cond->getType())->getBitWidth(); + + if (SI.getNumCases() == 0) { // unconditional branch + printPHICopiesForSuccessor(SI.getParent(), SI.getDefaultDest(), 2); + printBranchToBlock(SI.getParent(), SI.getDefaultDest(), 2); + Out << "\n"; + + } else if (NumBits <= 64) { // model as a switch statement + Out << " switch ("; + writeOperand(Cond); + Out << ") {\n default:\n"; + printPHICopiesForSuccessor(SI.getParent(), SI.getDefaultDest(), 2); + printBranchToBlock(SI.getParent(), SI.getDefaultDest(), 2); + + // CHECK: Needs much testing + for (auto Case : SI.cases()) { + ConstantInt *CaseVal = Case.getCaseValue(); + BasicBlock *Succ = Case.getCaseSuccessor(); + Out << " case "; + writeOperand(CaseVal); + Out << ":\n"; + printPHICopiesForSuccessor(SI.getParent(), Succ, 2); + if (isGotoCodeNecessary(SI.getParent(), Succ)) + printBranchToBlock(SI.getParent(), Succ, 2); + else + Out << " break;\n"; + } + Out << " }\n"; + + } else { // model as a series of if statements + Out << " "; + // CHECK: Needs much testing + for (auto Case : SI.cases()) { + Out << "if ("; + ConstantInt *CaseVal = Case.getCaseValue(); + BasicBlock *Succ = Case.getCaseSuccessor(); + ICmpInst *icmp = new ICmpInst(CmpInst::ICMP_EQ, Cond, CaseVal); + visitICmpInst(*icmp); + delete icmp; + Out << ") {\n"; + printPHICopiesForSuccessor(SI.getParent(), Succ, 2); + printBranchToBlock(SI.getParent(), Succ, 2); + Out << " } else "; + } + Out << "{\n"; + printPHICopiesForSuccessor(SI.getParent(), SI.getDefaultDest(), 2); + printBranchToBlock(SI.getParent(), SI.getDefaultDest(), 2); + Out << " }\n"; + } + Out << "\n"; } void CWriter::visitIndirectBrInst(IndirectBrInst &IBI) { - Out << " goto *(void*)("; - writeOperand(IBI.getOperand(0)); - Out << ");\n"; + Out << " goto *(void*)("; + writeOperand(IBI.getOperand(0)); + Out << ");\n"; } void CWriter::visitUnreachableInst(UnreachableInst &I) { - Out << " __builtin_unreachable();\n\n"; + Out << " __builtin_unreachable();\n\n"; } bool CWriter::isGotoCodeNecessary(BasicBlock *From, BasicBlock *To) { - /// FIXME: This should be reenabled, but loop reordering safe!! - return true; + /// FIXME: This should be reenabled, but loop reordering safe!! + return true; - if (std::next(Function::iterator(From)) != Function::iterator(To)) - return true; // Not the direct successor, we need a goto. + if (std::next(Function::iterator(From)) != Function::iterator(To)) + return true; // Not the direct successor, we need a goto. - //isa<SwitchInst>(From->getTerminator()) + // isa<SwitchInst>(From->getTerminator()) - if (LI->getLoopFor(From) != LI->getLoopFor(To)) - return true; - return false; + if (LI->getLoopFor(From) != LI->getLoopFor(To)) + return true; + return false; } -void CWriter::printPHICopiesForSuccessor (BasicBlock *CurBlock, - BasicBlock *Successor, - unsigned Indent) { - Out << "/* Printing PHIs for " << CurBlock->getName() << "->" << Successor->getName() << " */\n"; - DEBUG(errs() << "/* Printing PHIs for " << CurBlock->getName() << "->" << Successor->getName() << " */\n"); - for (BasicBlock::iterator I = Successor->begin(); isa<PHINode>(I); ++I) { - PHINode *PN = cast<PHINode>(I); - if(LInductionVars.find(PN) == LInductionVars.end()) { - Out << "/* Printing phi node: " << *PN << " */\n"; - DEBUG(errs() << "/* Printing phi node: " << *PN << " */\n"); - // Now we have to do the printing. - Value *IV = PN->getIncomingValueForBlock(CurBlock); - if (!isa<UndefValue>(IV) && !isEmptyType(IV->getType())) { - Out << std::string(Indent, ' '); - Out << " " << GetValueName(&*I) << "__PHI_TEMPORARY = "; - writeOperand(IV, ContextCasted); - Out << "; /* for PHI node */\n"; - } - } else { - Out << "/* Skipping (indvar) phi node: " << *PN << " */\n"; - DEBUG(errs() << "/* Skipping (indvar) phi node: " << *PN << " */\n"); - } - } +void CWriter::printPHICopiesForSuccessor(BasicBlock *CurBlock, + BasicBlock *Successor, + unsigned Indent) { + Out << "/* Printing PHIs for " << CurBlock->getName() << "->" + << Successor->getName() << " */\n"; + DEBUG(errs() << "/* Printing PHIs for " << CurBlock->getName() << "->" + << Successor->getName() << " */\n"); + for (BasicBlock::iterator I = Successor->begin(); isa<PHINode>(I); ++I) { + PHINode *PN = cast<PHINode>(I); + if (LInductionVars.find(PN) == LInductionVars.end()) { + Out << "/* Printing phi node: " << *PN << " */\n"; + DEBUG(errs() << "/* Printing phi node: " << *PN << " */\n"); + // Now we have to do the printing. + Value *IV = PN->getIncomingValueForBlock(CurBlock); + if (!isa<UndefValue>(IV) && !isEmptyType(IV->getType())) { + Out << std::string(Indent, ' '); + Out << " " << GetValueName(&*I) << "__PHI_TEMPORARY = "; + writeOperand(IV, ContextCasted); + Out << "; /* for PHI node */\n"; + } + } else { + Out << "/* Skipping (indvar) phi node: " << *PN << " */\n"; + DEBUG(errs() << "/* Skipping (indvar) phi node: " << *PN << " */\n"); + } + } } void CWriter::printBranchToBlock(BasicBlock *CurBB, BasicBlock *Succ, - unsigned Indent) { - if (isGotoCodeNecessary(CurBB, Succ)) { - Out << std::string(Indent, ' ') << " goto "; - writeOperand(Succ); - Out << ";\n"; - } + unsigned Indent) { + if (isGotoCodeNecessary(CurBB, Succ)) { + Out << std::string(Indent, ' ') << " goto "; + writeOperand(Succ); + Out << ";\n"; + } } -void CWriter::printBBorLoop (BasicBlock *BB) { - DEBUG(errs() << "\nPrinting: " << BB->getName() << "\n"); - Out << "\n/* Printing: " << BB->getName() << " */\n"; - if(VisitedBlocks.find(BB)!=VisitedBlocks.end() && ReplicateBlocks.find(BB)==ReplicateBlocks.end()) { - DEBUG(errs() << "This BB has already been printed and is not marked for replication! exiting!\n"); - Out << "/* This BB has already been printed and is not marked for replication! exiting! */\n"; - } else if(!ImmPostDommBlocks.empty() && ImmPostDommBlocks.top() == BB) { - DEBUG(errs() << "Reached block that is top of stack, return instead!\n"); - Out << "/* " << BB->getName() << " is top of stack, return instead! */\n"; - // ImmPostDommBlocks.pop(); - } else { - VisitedBlocks.insert(BB); - if(Loop *LL = LI->getLoopFor(BB)) { - if (LL->getHeader() == BB) - printLoop(LL); - else - printBasicBlock(BB); - } else { - printBasicBlock(BB); - } - } - +void CWriter::printBBorLoop(BasicBlock *BB) { + DEBUG(errs() << "\nPrinting: " << BB->getName() << "\n"); + Out << "\n/* Printing: " << BB->getName() << " */\n"; + if (VisitedBlocks.find(BB) != VisitedBlocks.end() && + ReplicateBlocks.find(BB) == ReplicateBlocks.end()) { + DEBUG(errs() << "This BB has already been printed and is not marked for " + "replication! exiting!\n"); + Out << "/* This BB has already been printed and is not marked for " + "replication! exiting! */\n"; + } else if (!ImmPostDommBlocks.empty() && ImmPostDommBlocks.top() == BB) { + DEBUG(errs() << "Reached block that is top of stack, return instead!\n"); + Out << "/* " << BB->getName() << " is top of stack, return instead! */\n"; + // ImmPostDommBlocks.pop(); + } else { + VisitedBlocks.insert(BB); + if (Loop *LL = LI->getLoopFor(BB)) { + if (LL->getHeader() == BB) + printLoop(LL); + else + printBasicBlock(BB); + } else { + printBasicBlock(BB); + } + } } -bool CWriter::compareBlocks(BasicBlock *CurrBlock, BasicBlock *CompBlock, BasicBlock *ImmPostDomm) { - CompVisitedBlocks.insert(CurrBlock); - DEBUG(errs() << "--Comparing " << CurrBlock->getName() << " with " << CompBlock->getName() << "\n"); - if (CurrBlock == ImmPostDomm) { - DEBUG(errs() << "----Reached Post Dominator, returning false!\n"); - return false; - } else if (CurrBlock == CompBlock) { - DEBUG(errs() << "----Found a match! " << CurrBlock->getName() << " == " << CompBlock->getName() << "\n"); - return true; - } else { - bool res = false; - for (auto succ: successors(CurrBlock)) { - if (CompVisitedBlocks.find(succ) == CompVisitedBlocks.end()) { - DEBUG(errs() << "----Visiting successor " << succ->getName() << " of " << CurrBlock->getName() << "\n"); - res = res || compareBlocks(succ, CompBlock, ImmPostDomm); - } else { - DEBUG(errs() << "----Skipping successor " << succ->getName() << " of " << CurrBlock->getName() << "\n"); - } - } - return res; - } +bool CWriter::compareBlocks(BasicBlock *CurrBlock, BasicBlock *CompBlock, + BasicBlock *ImmPostDomm) { + CompVisitedBlocks.insert(CurrBlock); + DEBUG(errs() << "--Comparing " << CurrBlock->getName() << " with " + << CompBlock->getName() << "\n"); + if (CurrBlock == ImmPostDomm) { + DEBUG(errs() << "----Reached Post Dominator, returning false!\n"); + return false; + } else if (CurrBlock == CompBlock) { + DEBUG(errs() << "----Found a match! " << CurrBlock->getName() + << " == " << CompBlock->getName() << "\n"); + return true; + } else { + bool res = false; + for (auto succ : successors(CurrBlock)) { + if (CompVisitedBlocks.find(succ) == CompVisitedBlocks.end()) { + DEBUG(errs() << "----Visiting successor " << succ->getName() << " of " + << CurrBlock->getName() << "\n"); + res = res || compareBlocks(succ, CompBlock, ImmPostDomm); + } else { + DEBUG(errs() << "----Skipping successor " << succ->getName() << " of " + << CurrBlock->getName() << "\n"); + } + } + return res; + } } -bool CWriter::findMatch(BasicBlock *CurrBlock, BasicBlock *CompBlock, BasicBlock *ImmPostDomm) { - if (CompBlock == ImmPostDomm) { - DEBUG(errs() << "Reached PostDomm; returning!\n"); - return false; - } - FindVisitedBlocks.insert(CompBlock); - DEBUG(errs() << "Finding match between " << CompBlock->getName() << " & " << CurrBlock->getName() << "\n"); - bool compareResult = compareBlocks(CurrBlock, CompBlock, ImmPostDomm); - CompVisitedBlocks.clear(); - if (compareResult){ - DEBUG(errs() << "Match found, marking " << CompBlock->getName() << " for replication!\n"); - // Flag for replication - ReplicateBlocks.insert(CompBlock); - return true; - } else { - bool res = false; - for (auto succ: successors(CompBlock)) { - if(FindVisitedBlocks.find(succ) == FindVisitedBlocks.end()) { - DEBUG(errs() << "Visiting successor " << succ->getName() << " of " << CompBlock->getName() << "\n"); - res = res || findMatch(CurrBlock, succ, ImmPostDomm); - if (res == true) break; - } else { - DEBUG(errs() << "Skipping successor " << succ->getName() << " of " << CompBlock->getName() << "\n"); - } - } - return res; - } +bool CWriter::findMatch(BasicBlock *CurrBlock, BasicBlock *CompBlock, + BasicBlock *ImmPostDomm) { + if (CompBlock == ImmPostDomm) { + DEBUG(errs() << "Reached PostDomm; returning!\n"); + return false; + } + FindVisitedBlocks.insert(CompBlock); + DEBUG(errs() << "Finding match between " << CompBlock->getName() << " & " + << CurrBlock->getName() << "\n"); + bool compareResult = compareBlocks(CurrBlock, CompBlock, ImmPostDomm); + CompVisitedBlocks.clear(); + if (compareResult) { + DEBUG(errs() << "Match found, marking " << CompBlock->getName() + << " for replication!\n"); + // Flag for replication + ReplicateBlocks.insert(CompBlock); + return true; + } else { + bool res = false; + for (auto succ : successors(CompBlock)) { + if (FindVisitedBlocks.find(succ) == FindVisitedBlocks.end()) { + DEBUG(errs() << "Visiting successor " << succ->getName() << " of " + << CompBlock->getName() << "\n"); + res = res || findMatch(CurrBlock, succ, ImmPostDomm); + if (res == true) + break; + } else { + DEBUG(errs() << "Skipping successor " << succ->getName() << " of " + << CompBlock->getName() << "\n"); + } + } + return res; + } } // Branch instruction printing - Avoid printing out a branch to a basic block // that immediately succeeds the current one. // void CWriter::visitBranchInst(BranchInst &I) { - errs() << "Visiting Branch Instruction: " << I <<"\n"; - Out << "\n/* Branch: " << I << " */\n"; - - if (I.isConditional()) { - BasicBlock *BB0 = I.getSuccessor(0); - BasicBlock *BB1 = I.getSuccessor(1); - BasicBlock *ImmPostDomm = PDT->findNearestCommonDominator(BB0,BB1); - - // Iterate over all BBs in then & else to find a matching BB - // If found, mark it for replication - if (ImmPostDomm != BB1 && ImmPostDomm != BB0) { - findMatch(BB0, BB1, ImmPostDomm); - FindVisitedBlocks.clear(); - } - if(Loop *L = LI->getLoopFor(I.getParent())) { - if(L == LI->getLoopFor(BB0) && !(L == LI->getLoopFor(BB1))) { - errs() << "This is a loop branch!\n"; - Out << "/* This is a loop branch! */\n"; - //BB0 is in the loop. Print it if it hsn't been printed - if(VisitedBlocks.find(BB0) != VisitedBlocks.end()) { - errs() << "Branching back to header: " << BB0->getName() << "\n"; - errs() << "This is the end of the loop, closing!\n"; - Out << "/* Branching back to header: " << BB0->getName() << " */\n"; - Out << "/* Closing loop! */\n"; - //BB0 is the loop header. CLose the loop then print BB1. - printPHICopiesForSuccessor (I.getParent(), BB0, 2); - Out << " }\n"; - printPHICopiesForSuccessor (I.getParent(), BB1, 2); - printBBorLoop(BB1); - } else { - errs() << "Not branching to header! Branching to: " << BB0->getName() << "\n"; - //BB0 is not the loop header. That means we are entering loop body - - llvm_unreachable("loop branch unhandled!\n"); - } - } else if(L == LI->getLoopFor(BB1) && !(L == LI->getLoopFor(BB0))) { - errs() << "This is a loop branch!\n"; - Out << "/* This is a loop branch! */\n"; - if(VisitedBlocks.find(BB1) != VisitedBlocks.end()) { - errs() << "Branching back to header: " << BB1->getName() << "\n"; - errs() << "This is the end of the loop, closing!\n"; - Out << "/* Branching back to header: " << BB1->getName() << " */\n"; - Out << "/* Closing loop! */\n"; - //BB0 is the loop header. CLose the loop then print BB1. - printPHICopiesForSuccessor (I.getParent(), BB1, 2); - Out << " }\n"; - printPHICopiesForSuccessor (I.getParent(), BB0, 2); - printBBorLoop(BB0); - } else { - errs() << "Not branching to header! Branching to: " << BB1->getName() << "\n"; - //BB1 is not the loop header. That means we are entering loop body - llvm_unreachable("loop branch unhandled!\n"); - } - } else { - errs() << "This is a conditional statement within a loop!\n"; - Out << "/* This is a conditional statement within a loop! */\n"; - errs() << ImmPostDomm->getName() << " is the immediate post dominator of " << BB0->getName() << " and " << BB1->getName() << "\n"; - if(VisitedBlocks.find(ImmPostDomm) != VisitedBlocks.end()) { - errs() << "Not pushing " << ImmPostDomm->getName() << " because it has already been visited!\n"; - } else { - errs() << "Pushing " << ImmPostDomm->getName() << " onto stack!\n"; - ImmPostDommBlocks.push(ImmPostDomm); - } - - bool noElse = false; - if(BB1 == ImmPostDomm) { - noElse = true; - } - Out << " if ("; - writeOperand(I.getCondition(), ContextCasted); - Out << ") { /* " << I << "*/\n"; - printPHICopiesForSuccessor (I.getParent(), BB0, 2); - printBBorLoop(BB0); - errs() << "Back to handling " << I.getParent()->getName() << ": " << I << "\n"; - Out << "/* Back to handling " << I.getParent()->getName() << ": " << I << " */\n"; - if (!noElse) { - errs() << "Printing else!\n"; - Out << " } else { /*" << I << "*/\n"; - printPHICopiesForSuccessor (I.getParent(), BB1, 2); - ElseBlocks.push(BB1); - ElseBranches.push(&I); - printBBorLoop(BB1); - errs() << "Back to handling " << I.getParent()->getName() << ": " << I << "\n"; - errs() << "Check to see if else block is closed!\n"; - Out << "/* Back to handling " << I.getParent()->getName() << ": " << I << " */\n" ; - Out << "/* Check to see if else block is closed! */\n" ; - if(!ElseBlocks.empty() && ElseBlocks.top() == BB1) { - errs() << "Else block not closed, need to close braces!\n"; - Out << "/* Else block not closed, need to close braces! */\n" ; - Out << "} /* closing " << *(ElseBranches.top()) << " */\n"; - ElseBranches.pop(); - ElseBlocks.pop(); - } - if(!ImmPostDommBlocks.empty() && ImmPostDommBlocks.top() == ImmPostDomm) { - errs() << "Will now pop post dom them handle it!\n"; - ImmPostDommBlocks.pop(); - printBBorLoop(ImmPostDomm); - } else { - errs() << "*!*!*!*!*!*!Not sure what is happening here!*!*!*!*!*!*!\n"; - } - } else { - errs() << "No else block. Adding one for phis, then moving to " << BB1->getName() << "!\n"; - Out << "/* (3913) No else block. Adding one for phis, then moving to " << BB1->getName() << "! */\n"; - Out << " } /* closing " << I << "*/\n"; - errs() << "Will now pop post dom them handle it!\n"; - ImmPostDommBlocks.pop(); - Out << "else {\n"; - printPHICopiesForSuccessor (I.getParent(), BB1, 2); - Out << "}\n"; - printBBorLoop(BB1); - } - } - } else { - errs() << "This is a conditional statement!\n"; - errs() << ImmPostDomm->getName() << " is the immediate post dominator of " << BB0->getName() << " and " << BB1->getName() << "\n"; - if(VisitedBlocks.find(ImmPostDomm) != VisitedBlocks.end()) { - errs() << "Not pushing " << ImmPostDomm->getName() << " because it has already been visited!\n"; - } else { - errs() << "Pushing " << ImmPostDomm->getName() << " onto stack!\n"; - ImmPostDommBlocks.push(ImmPostDomm); - } - bool noElse = false; - if(BB1 == ImmPostDomm) { - noElse = true; - } - Out << " if ("; - writeOperand(I.getCondition(), ContextCasted); - Out << ") { /* " << I << "*/\n"; - printPHICopiesForSuccessor (I.getParent(), BB0, 2); - printBBorLoop(BB0); - errs() << "Back to handling " << I.getParent()->getName() << ": " << I << "\n"; - Out << "/* Back to handling " << I.getParent()->getName() << ": " << I << " */\n" ; - if (!noElse) { - errs() << "Printing else!\n"; - Out << "/* Printing else! */\n" ; - Out << " } else { /*" << I << "*/\n"; - printPHICopiesForSuccessor (I.getParent(), BB1, 2); - ElseBlocks.push(BB1); - ElseBranches.push(&I); - printBBorLoop(BB1); - errs() << "Back to handling " << I.getParent()->getName() << ": " << I << "\n"; - errs() << "Check to see if else block is closed!\n"; - Out << "/* Back to handling " << I.getParent()->getName() << ": " << I << " */\n"; - Out << "/* Check to see if else block is closed! */\n"; - if(!ElseBlocks.empty() && ElseBlocks.top() == BB1) { - errs() << "Else block not closed, need to close braces!\n"; - Out << "/* Else block not closed, need to close braces! */\n"; - Out << "} /* closing " << *(ElseBranches.top()) << " */\n"; - ElseBranches.pop(); - ElseBlocks.pop(); - } - if(!ImmPostDommBlocks.empty() && ImmPostDommBlocks.top() == ImmPostDomm) { - errs() << "Will now pop post dom them handle it!\n"; - ImmPostDommBlocks.pop(); - printBBorLoop(ImmPostDomm); - } else { - errs() << "*!*!*!*!*!*!Not sure what is happening here!*!*!*!*!*!*!\n"; - } - } else { - errs() << "No else block. Adding one for phis, then moving to " << BB1->getName() << "!\n"; - Out << "/* (3985) No else block. Adding one for phis, then moving to " << BB1->getName() << "! */\n"; - Out << " } /* closing " << I << "*/\n"; - errs() << "Will now pop post dom them handle it!\n"; - ImmPostDommBlocks.pop(); - Out << "else {\n"; - printPHICopiesForSuccessor (I.getParent(), BB1, 2); - Out << "}\n"; - printBBorLoop(BB1); - } - } - } else { - errs() << "This is an unconditional branch!\n"; - BasicBlock *BB = I.getSuccessor(0); - printPHICopiesForSuccessor (I.getParent(), BB, 2); - if (!ElseBlocks.empty() && I.getParent() == ElseBlocks.top()) { - errs() << "Branch marks end of else block, need to close braces!\n"; - Out << "/* Branch marks end of else block, need to close braces! */\n"; - Out << "} /* closing " << *(ElseBranches.top()) << " */\n"; - ElseBranches.pop(); - ElseBlocks.pop(); - } - printBBorLoop(BB); - } - Out << "\n"; + errs() << "Visiting Branch Instruction: " << I << "\n"; + Out << "\n/* Branch: " << I << " */\n"; + + if (I.isConditional()) { + BasicBlock *BB0 = I.getSuccessor(0); + BasicBlock *BB1 = I.getSuccessor(1); + BasicBlock *ImmPostDomm = PDT->findNearestCommonDominator(BB0, BB1); + + // Iterate over all BBs in then & else to find a matching BB + // If found, mark it for replication + if (ImmPostDomm != BB1 && ImmPostDomm != BB0) { + findMatch(BB0, BB1, ImmPostDomm); + FindVisitedBlocks.clear(); + } + if (Loop *L = LI->getLoopFor(I.getParent())) { + if (L == LI->getLoopFor(BB0) && !(L == LI->getLoopFor(BB1))) { + errs() << "This is a loop branch!\n"; + Out << "/* This is a loop branch! */\n"; + // BB0 is in the loop. Print it if it hsn't been printed + if (VisitedBlocks.find(BB0) != VisitedBlocks.end()) { + errs() << "Branching back to header: " << BB0->getName() << "\n"; + errs() << "This is the end of the loop, closing!\n"; + Out << "/* Branching back to header: " << BB0->getName() << " */\n"; + Out << "/* Closing loop! */\n"; + // BB0 is the loop header. CLose the loop then print BB1. + printPHICopiesForSuccessor(I.getParent(), BB0, 2); + Out << " }\n"; + printPHICopiesForSuccessor(I.getParent(), BB1, 2); + printBBorLoop(BB1); + } else { + errs() << "Not branching to header! Branching to: " << BB0->getName() + << "\n"; + // BB0 is not the loop header. That means we are entering loop body + + llvm_unreachable("loop branch unhandled!\n"); + } + } else if (L == LI->getLoopFor(BB1) && !(L == LI->getLoopFor(BB0))) { + errs() << "This is a loop branch!\n"; + Out << "/* This is a loop branch! */\n"; + if (VisitedBlocks.find(BB1) != VisitedBlocks.end()) { + errs() << "Branching back to header: " << BB1->getName() << "\n"; + errs() << "This is the end of the loop, closing!\n"; + Out << "/* Branching back to header: " << BB1->getName() << " */\n"; + Out << "/* Closing loop! */\n"; + // BB0 is the loop header. CLose the loop then print BB1. + printPHICopiesForSuccessor(I.getParent(), BB1, 2); + Out << " }\n"; + printPHICopiesForSuccessor(I.getParent(), BB0, 2); + printBBorLoop(BB0); + } else { + errs() << "Not branching to header! Branching to: " << BB1->getName() + << "\n"; + // BB1 is not the loop header. That means we are entering loop body + llvm_unreachable("loop branch unhandled!\n"); + } + } else { + errs() << "This is a conditional statement within a loop!\n"; + Out << "/* This is a conditional statement within a loop! */\n"; + errs() << ImmPostDomm->getName() + << " is the immediate post dominator of " << BB0->getName() + << " and " << BB1->getName() << "\n"; + if (VisitedBlocks.find(ImmPostDomm) != VisitedBlocks.end()) { + errs() << "Not pushing " << ImmPostDomm->getName() + << " because it has already been visited!\n"; + } else { + errs() << "Pushing " << ImmPostDomm->getName() << " onto stack!\n"; + ImmPostDommBlocks.push(ImmPostDomm); + } + + bool noElse = false; + if (BB1 == ImmPostDomm) { + noElse = true; + } + Out << " if ("; + writeOperand(I.getCondition(), ContextCasted); + Out << ") { /* " << I << "*/\n"; + printPHICopiesForSuccessor(I.getParent(), BB0, 2); + printBBorLoop(BB0); + errs() << "Back to handling " << I.getParent()->getName() << ": " << I + << "\n"; + Out << "/* Back to handling " << I.getParent()->getName() << ": " << I + << " */\n"; + if (!noElse) { + errs() << "Printing else!\n"; + Out << " } else { /*" << I << "*/\n"; + printPHICopiesForSuccessor(I.getParent(), BB1, 2); + ElseBlocks.push(BB1); + ElseBranches.push(&I); + printBBorLoop(BB1); + errs() << "Back to handling " << I.getParent()->getName() << ": " << I + << "\n"; + errs() << "Check to see if else block is closed!\n"; + Out << "/* Back to handling " << I.getParent()->getName() << ": " << I + << " */\n"; + Out << "/* Check to see if else block is closed! */\n"; + if (!ElseBlocks.empty() && ElseBlocks.top() == BB1) { + errs() << "Else block not closed, need to close braces!\n"; + Out << "/* Else block not closed, need to close braces! */\n"; + Out << "} /* closing " << *(ElseBranches.top()) << " */\n"; + ElseBranches.pop(); + ElseBlocks.pop(); + } + if (!ImmPostDommBlocks.empty() && + ImmPostDommBlocks.top() == ImmPostDomm) { + errs() << "Will now pop post dom them handle it!\n"; + ImmPostDommBlocks.pop(); + printBBorLoop(ImmPostDomm); + } else { + errs() + << "*!*!*!*!*!*!Not sure what is happening here!*!*!*!*!*!*!\n"; + } + } else { + errs() << "No else block. Adding one for phis, then moving to " + << BB1->getName() << "!\n"; + Out << "/* (3913) No else block. Adding one for phis, then moving to " + << BB1->getName() << "! */\n"; + Out << " } /* closing " << I << "*/\n"; + errs() << "Will now pop post dom them handle it!\n"; + ImmPostDommBlocks.pop(); + Out << "else {\n"; + printPHICopiesForSuccessor(I.getParent(), BB1, 2); + Out << "}\n"; + printBBorLoop(BB1); + } + } + } else { + errs() << "This is a conditional statement!\n"; + errs() << ImmPostDomm->getName() << " is the immediate post dominator of " + << BB0->getName() << " and " << BB1->getName() << "\n"; + if (VisitedBlocks.find(ImmPostDomm) != VisitedBlocks.end()) { + errs() << "Not pushing " << ImmPostDomm->getName() + << " because it has already been visited!\n"; + } else { + errs() << "Pushing " << ImmPostDomm->getName() << " onto stack!\n"; + ImmPostDommBlocks.push(ImmPostDomm); + } + bool noElse = false; + if (BB1 == ImmPostDomm) { + noElse = true; + } + Out << " if ("; + writeOperand(I.getCondition(), ContextCasted); + Out << ") { /* " << I << "*/\n"; + printPHICopiesForSuccessor(I.getParent(), BB0, 2); + printBBorLoop(BB0); + errs() << "Back to handling " << I.getParent()->getName() << ": " << I + << "\n"; + Out << "/* Back to handling " << I.getParent()->getName() << ": " << I + << " */\n"; + if (!noElse) { + errs() << "Printing else!\n"; + Out << "/* Printing else! */\n"; + Out << " } else { /*" << I << "*/\n"; + printPHICopiesForSuccessor(I.getParent(), BB1, 2); + ElseBlocks.push(BB1); + ElseBranches.push(&I); + printBBorLoop(BB1); + errs() << "Back to handling " << I.getParent()->getName() << ": " << I + << "\n"; + errs() << "Check to see if else block is closed!\n"; + Out << "/* Back to handling " << I.getParent()->getName() << ": " << I + << " */\n"; + Out << "/* Check to see if else block is closed! */\n"; + if (!ElseBlocks.empty() && ElseBlocks.top() == BB1) { + errs() << "Else block not closed, need to close braces!\n"; + Out << "/* Else block not closed, need to close braces! */\n"; + Out << "} /* closing " << *(ElseBranches.top()) << " */\n"; + ElseBranches.pop(); + ElseBlocks.pop(); + } + if (!ImmPostDommBlocks.empty() && + ImmPostDommBlocks.top() == ImmPostDomm) { + errs() << "Will now pop post dom them handle it!\n"; + ImmPostDommBlocks.pop(); + printBBorLoop(ImmPostDomm); + } else { + errs() + << "*!*!*!*!*!*!Not sure what is happening here!*!*!*!*!*!*!\n"; + } + } else { + errs() << "No else block. Adding one for phis, then moving to " + << BB1->getName() << "!\n"; + Out << "/* (3985) No else block. Adding one for phis, then moving to " + << BB1->getName() << "! */\n"; + Out << " } /* closing " << I << "*/\n"; + errs() << "Will now pop post dom them handle it!\n"; + ImmPostDommBlocks.pop(); + Out << "else {\n"; + printPHICopiesForSuccessor(I.getParent(), BB1, 2); + Out << "}\n"; + printBBorLoop(BB1); + } + } + } else { + errs() << "This is an unconditional branch!\n"; + BasicBlock *BB = I.getSuccessor(0); + printPHICopiesForSuccessor(I.getParent(), BB, 2); + if (!ElseBlocks.empty() && I.getParent() == ElseBlocks.top()) { + errs() << "Branch marks end of else block, need to close braces!\n"; + Out << "/* Branch marks end of else block, need to close braces! */\n"; + Out << "} /* closing " << *(ElseBranches.top()) << " */\n"; + ElseBranches.pop(); + ElseBlocks.pop(); + } + printBBorLoop(BB); + } + Out << "\n"; } // PHI nodes get copied into temporary values at the end of predecessor basic // blocks. We now need to copy these temporary values into the REAL value for // the PHI. void CWriter::visitPHINode(PHINode &I) { - if (LInductionVars.find(&I) == LInductionVars.end()) { - writeOperand(&I); - Out << "__PHI_TEMPORARY"; - } - else { - DEBUG(errs() << "Skipping PHI node for induction variable!\n"); - } + if (LInductionVars.find(&I) == LInductionVars.end()) { + writeOperand(&I); + Out << "__PHI_TEMPORARY"; + } else { + DEBUG(errs() << "Skipping PHI node for induction variable!\n"); + } } - // NOTE: Moving LLVM-4 Binary Op functions here bool isNeg(const Value *V) { - if (const BinaryOperator *Bop = dyn_cast<BinaryOperator>(V)) - if (Bop->getOpcode() == Instruction::Sub) - if (Constant *C = dyn_cast<Constant>(Bop->getOperand(0))) - return C->isNegativeZeroValue(); - return false; + if (const BinaryOperator *Bop = dyn_cast<BinaryOperator>(V)) + if (Bop->getOpcode() == Instruction::Sub) + if (Constant *C = dyn_cast<Constant>(Bop->getOperand(0))) + return C->isNegativeZeroValue(); + return false; } bool isFNeg(const Value *V, bool IgnoreZeroSign) { - if (const BinaryOperator *Bop = dyn_cast<BinaryOperator>(V)) - if (Bop->getOpcode() == Instruction::FSub) - if (Constant *C = dyn_cast<Constant>(Bop->getOperand(0))) { - if (!IgnoreZeroSign) - IgnoreZeroSign = cast<Instruction>(V)->hasNoSignedZeros(); - return !IgnoreZeroSign ? C->isNegativeZeroValue() : C->isZeroValue(); - } - return false; + if (const BinaryOperator *Bop = dyn_cast<BinaryOperator>(V)) + if (Bop->getOpcode() == Instruction::FSub) + if (Constant *C = dyn_cast<Constant>(Bop->getOperand(0))) { + if (!IgnoreZeroSign) + IgnoreZeroSign = cast<Instruction>(V)->hasNoSignedZeros(); + return !IgnoreZeroSign ? C->isNegativeZeroValue() : C->isZeroValue(); + } + return false; } - Value *getNegArgument(Value *BinOp) { - return cast<BinaryOperator>(BinOp)->getOperand(1); + return cast<BinaryOperator>(BinOp)->getOperand(1); } const Value *getNegArgument(const Value *BinOp) { - return getNegArgument(const_cast<Value*>(BinOp)); + return getNegArgument(const_cast<Value *>(BinOp)); } Value *getFNegArgument(Value *BinOp) { - return cast<BinaryOperator>(BinOp)->getOperand(1); + return cast<BinaryOperator>(BinOp)->getOperand(1); } const Value *getFNegArgument(const Value *BinOp) { - return getFNegArgument(const_cast<Value*>(BinOp)); + return getFNegArgument(const_cast<Value *>(BinOp)); } static inline bool isConstantAllOnes(const Value *V) { - if (const Constant *C = dyn_cast<Constant>(V)) - return C->isAllOnesValue(); - return false; + if (const Constant *C = dyn_cast<Constant>(V)) + return C->isAllOnesValue(); + return false; } bool isNot(const Value *V) { - if (const BinaryOperator *Bop = dyn_cast<BinaryOperator>(V)) - return (Bop->getOpcode() == Instruction::Xor && - (isConstantAllOnes(Bop->getOperand(1)) || - isConstantAllOnes(Bop->getOperand(0)))); - return false; + if (const BinaryOperator *Bop = dyn_cast<BinaryOperator>(V)) + return (Bop->getOpcode() == Instruction::Xor && + (isConstantAllOnes(Bop->getOperand(1)) || + isConstantAllOnes(Bop->getOperand(0)))); + return false; } - Value *getNotArgument(Value *BinOp) { - assert(isNot(BinOp) && "getNotArgument on non-'not' instruction!"); - BinaryOperator *BO = cast<BinaryOperator>(BinOp); - Value *Op0 = BO->getOperand(0); - Value *Op1 = BO->getOperand(1); - if (isConstantAllOnes(Op0)) return Op1; - - assert(isConstantAllOnes(Op1)); - return Op0; + assert(isNot(BinOp) && "getNotArgument on non-'not' instruction!"); + BinaryOperator *BO = cast<BinaryOperator>(BinOp); + Value *Op0 = BO->getOperand(0); + Value *Op1 = BO->getOperand(1); + if (isConstantAllOnes(Op0)) + return Op1; + + assert(isConstantAllOnes(Op1)); + return Op0; } const Value *getNotArgument(const Value *BinOp) { - return getNotArgument(const_cast<Value*>(BinOp)); + return getNotArgument(const_cast<Value *>(BinOp)); } +void CWriter::visitBinaryOperator(BinaryOperator &I) { + // binary instructions, shift instructions, setCond instructions. + assert(!I.getType()->isPointerTy()); + DEBUG(errs() << "visiting binary operator!\n"); + + // // We must cast the results of binary operations which might be promoted. + // bool needsCast = false; + // if ((I.getType() == Type::getInt8Ty(I.getContext())) || + // (I.getType() == Type::getInt16Ty(I.getContext())) + // || (I.getType() == Type::getFloatTy(I.getContext()))) { + // // types too small to work with directly + // needsCast = true; + // } else if (I.getType()->getPrimitiveSizeInBits() > 64) { + // // types too big to work with directly + // needsCast = true; + // } + // bool shouldCast; + // bool castIsSigned; + // opcodeNeedsCast(I.getOpcode(), shouldCast, castIsSigned); + // + // if (I.getType()->isVectorTy() || needsCast || shouldCast) { + // + // DEBUG( + // if(needsCast) errs() << "****Needs Cast: \n" << I << "\n"; + // else if(shouldCast) errs() << "****Should Cast: \n" << I << "\n"; + // else if(I.getType()->isVectorTy()) errs() << "****Is Vector Type: \n" + // << I << "\n"; + // ); + // + // Type *VTy = I.getOperand(0)->getType(); + // unsigned opcode; + // if (BinaryOperator::isNeg(&I)) { + // opcode = BinaryNeg; + // Out << "llvm_neg_"; + // printTypeString(Out, VTy, false); + // Out << "("; + // writeOperand(BinaryOperator::getNegArgument(&I), ContextCasted); + // } else if (BinaryOperator::isFNeg(&I)) { + // opcode = BinaryNeg; + // Out << "llvm_neg_"; + // printTypeString(Out, VTy, false); + // Out << "("; + // writeOperand(BinaryOperator::getFNegArgument(&I), ContextCasted); + // } else if (BinaryOperator::isNot(&I)) { + // opcode = BinaryNot; + // Out << "llvm_not_"; + // printTypeString(Out, VTy, false); + // Out << "("; + // writeOperand(BinaryOperator::getNotArgument(&I), ContextCasted); + // } else { + // opcode = I.getOpcode(); + // Out << "llvm_" << Instruction::getOpcodeName(opcode) << "_"; + // printTypeString(Out, VTy, false); + // Out << "("; + // writeOperand(I.getOperand(0), ContextCasted); + // Out << ", "; + // writeOperand(I.getOperand(1), ContextCasted); + // } + // Out << ")"; + // InlineOpDeclTypes.insert(std::pair<unsigned, Type*>(opcode, VTy)); + // return; + // } + // If this is a negation operation, print it out as such. For FP, we don't + // want to print "-0.0 - X". + // if (BinaryOperator::isNeg(&I)) { + if (isNeg(&I)) { + Out << "-("; + writeOperand(getNegArgument(&I)); + Out << ")"; + } + // else if (BinaryOperator::isFNeg(&I)) { + else if (isFNeg(&I, true)) { + Out << "-("; + writeOperand(getFNegArgument(&I)); + Out << ")"; + } else if (isNot(&I)) { + Out << "~("; + writeOperand(getNotArgument(&I)); + Out << ")"; + } else if (I.getOpcode() == Instruction::FRem) { + // Output a call to fmod/fmodf instead of emitting a%b + if (I.getType() == Type::getFloatTy(I.getContext())) + Out << "fmodf("; + else if (I.getType() == Type::getDoubleTy(I.getContext())) + Out << "fmod("; + else // all 3 flavors of long double + Out << "fmodl("; + writeOperand(I.getOperand(0), ContextCasted); + Out << ", "; + writeOperand(I.getOperand(1), ContextCasted); + Out << ")"; + } else { + // Write out the cast of the instruction's value back to the proper type + // if necessary. + // bool NeedsClosingParens = writeInstructionCast(I); + // Certain instructions require the operand to be forced to a specific type + // so we use writeOperandWithCast here instead of writeOperand. Similarly + // below for operand 1 + writeOperandWithCast(I.getOperand(0), I.getOpcode()); -void CWriter::visitBinaryOperator(BinaryOperator &I) { - // binary instructions, shift instructions, setCond instructions. - assert(!I.getType()->isPointerTy()); - DEBUG(errs() << "visiting binary operator!\n" ); - - // // We must cast the results of binary operations which might be promoted. - // bool needsCast = false; - // if ((I.getType() == Type::getInt8Ty(I.getContext())) || - // (I.getType() == Type::getInt16Ty(I.getContext())) - // || (I.getType() == Type::getFloatTy(I.getContext()))) { - // // types too small to work with directly - // needsCast = true; - // } else if (I.getType()->getPrimitiveSizeInBits() > 64) { - // // types too big to work with directly - // needsCast = true; - // } - // bool shouldCast; - // bool castIsSigned; - // opcodeNeedsCast(I.getOpcode(), shouldCast, castIsSigned); - // - // if (I.getType()->isVectorTy() || needsCast || shouldCast) { - // - // DEBUG( - // if(needsCast) errs() << "****Needs Cast: \n" << I << "\n"; - // else if(shouldCast) errs() << "****Should Cast: \n" << I << "\n"; - // else if(I.getType()->isVectorTy()) errs() << "****Is Vector Type: \n" << I << "\n"; - // ); - // - // Type *VTy = I.getOperand(0)->getType(); - // unsigned opcode; - // if (BinaryOperator::isNeg(&I)) { - // opcode = BinaryNeg; - // Out << "llvm_neg_"; - // printTypeString(Out, VTy, false); - // Out << "("; - // writeOperand(BinaryOperator::getNegArgument(&I), ContextCasted); - // } else if (BinaryOperator::isFNeg(&I)) { - // opcode = BinaryNeg; - // Out << "llvm_neg_"; - // printTypeString(Out, VTy, false); - // Out << "("; - // writeOperand(BinaryOperator::getFNegArgument(&I), ContextCasted); - // } else if (BinaryOperator::isNot(&I)) { - // opcode = BinaryNot; - // Out << "llvm_not_"; - // printTypeString(Out, VTy, false); - // Out << "("; - // writeOperand(BinaryOperator::getNotArgument(&I), ContextCasted); - // } else { - // opcode = I.getOpcode(); - // Out << "llvm_" << Instruction::getOpcodeName(opcode) << "_"; - // printTypeString(Out, VTy, false); - // Out << "("; - // writeOperand(I.getOperand(0), ContextCasted); - // Out << ", "; - // writeOperand(I.getOperand(1), ContextCasted); - // } - // Out << ")"; - // InlineOpDeclTypes.insert(std::pair<unsigned, Type*>(opcode, VTy)); - // return; - // } - - // If this is a negation operation, print it out as such. For FP, we don't - // want to print "-0.0 - X". - - //if (BinaryOperator::isNeg(&I)) { - if (isNeg(&I)) { - Out << "-("; - writeOperand(getNegArgument(&I)); - Out << ")"; - } - //else if (BinaryOperator::isFNeg(&I)) { - else if (isFNeg(&I, true)) { - Out << "-("; - writeOperand(getFNegArgument(&I)); - Out << ")"; - } else if (isNot(&I)) { - Out << "~("; - writeOperand(getNotArgument(&I)); - Out << ")"; - } else if (I.getOpcode() == Instruction::FRem) { - // Output a call to fmod/fmodf instead of emitting a%b - if (I.getType() == Type::getFloatTy(I.getContext())) - Out << "fmodf("; - else if (I.getType() == Type::getDoubleTy(I.getContext())) - Out << "fmod("; - else // all 3 flavors of long double - Out << "fmodl("; - writeOperand(I.getOperand(0), ContextCasted); - Out << ", "; - writeOperand(I.getOperand(1), ContextCasted); - Out << ")"; - } else { - - // Write out the cast of the instruction's value back to the proper type - // if necessary. - // bool NeedsClosingParens = writeInstructionCast(I); - - // Certain instructions require the operand to be forced to a specific type - // so we use writeOperandWithCast here instead of writeOperand. Similarly - // below for operand 1 - writeOperandWithCast(I.getOperand(0), I.getOpcode()); - - switch (I.getOpcode()) { - case Instruction::Add: - case Instruction::FAdd: Out << " + "; break; - case Instruction::Sub: - case Instruction::FSub: Out << " - "; break; - case Instruction::Mul: - case Instruction::FMul: Out << " * "; break; - case Instruction::URem: - case Instruction::SRem: - case Instruction::FRem: Out << " % "; break; - case Instruction::UDiv: - case Instruction::SDiv: - case Instruction::FDiv: Out << " / "; break; - case Instruction::And: Out << " & "; break; - case Instruction::Or: Out << " | "; break; - case Instruction::Xor: Out << " ^ "; break; - case Instruction::Shl : Out << " << "; break; - case Instruction::LShr: - case Instruction::AShr: Out << " >> "; break; - default: + switch (I.getOpcode()) { + case Instruction::Add: + case Instruction::FAdd: + Out << " + "; + break; + case Instruction::Sub: + case Instruction::FSub: + Out << " - "; + break; + case Instruction::Mul: + case Instruction::FMul: + Out << " * "; + break; + case Instruction::URem: + case Instruction::SRem: + case Instruction::FRem: + Out << " % "; + break; + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::FDiv: + Out << " / "; + break; + case Instruction::And: + Out << " & "; + break; + case Instruction::Or: + Out << " | "; + break; + case Instruction::Xor: + Out << " ^ "; + break; + case Instruction::Shl: + Out << " << "; + break; + case Instruction::LShr: + case Instruction::AShr: + Out << " >> "; + break; + default: #ifndef NDEBUG - errs() << "Invalid operator type!" << I; + errs() << "Invalid operator type!" << I; #endif - llvm_unreachable(0); - } + llvm_unreachable(0); + } - writeOperandWithCast(I.getOperand(1), I.getOpcode()); - // if (NeedsClosingParens) - // Out << "))"; - } + writeOperandWithCast(I.getOperand(1), I.getOpcode()); + // if (NeedsClosingParens) + // Out << "))"; + } } void CWriter::visitICmpInst(ICmpInst &I) { - if (I.getType()->isVectorTy() - || I.getOperand(0)->getType()->getPrimitiveSizeInBits() > 64) { - Out << "llvm_icmp_" << getCmpPredicateName(I.getPredicate()) << "_"; - printTypeString(Out, I.getOperand(0)->getType(), I.isSigned()); - Out << "("; - writeOperand(I.getOperand(0), ContextCasted); - Out << ", "; - writeOperand(I.getOperand(1), ContextCasted); - Out << ")"; - if (VectorType *VTy = dyn_cast<VectorType>(I.getOperand(0)->getType())) { - CmpDeclTypes.insert(std::pair<CmpInst::Predicate, VectorType*>(I.getPredicate(), VTy)); - TypedefDeclTypes.insert(I.getType()); // insert type not necessarily visible above - } - return; - } + if (I.getType()->isVectorTy() || + I.getOperand(0)->getType()->getPrimitiveSizeInBits() > 64) { + Out << "llvm_icmp_" << getCmpPredicateName(I.getPredicate()) << "_"; + printTypeString(Out, I.getOperand(0)->getType(), I.isSigned()); + Out << "("; + writeOperand(I.getOperand(0), ContextCasted); + Out << ", "; + writeOperand(I.getOperand(1), ContextCasted); + Out << ")"; + if (VectorType *VTy = dyn_cast<VectorType>(I.getOperand(0)->getType())) { + CmpDeclTypes.insert( + std::pair<CmpInst::Predicate, VectorType *>(I.getPredicate(), VTy)); + TypedefDeclTypes.insert( + I.getType()); // insert type not necessarily visible above + } + return; + } - // Write out the cast of the instruction's value back to the proper type - // if necessary. - bool NeedsClosingParens = writeInstructionCast(I); - - // Certain icmp predicate require the operand to be forced to a specific type - // so we use writeOperandWithCast here instead of writeOperand. Similarly - // below for operand 1 - writeOperandWithCast(I.getOperand(0), I); - - switch (I.getPredicate()) { - case ICmpInst::ICMP_EQ: Out << " == "; break; - case ICmpInst::ICMP_NE: Out << " != "; break; - case ICmpInst::ICMP_ULE: - case ICmpInst::ICMP_SLE: Out << " <= "; break; - case ICmpInst::ICMP_UGE: - case ICmpInst::ICMP_SGE: Out << " >= "; break; - case ICmpInst::ICMP_ULT: - case ICmpInst::ICMP_SLT: Out << " < "; break; - case ICmpInst::ICMP_UGT: - case ICmpInst::ICMP_SGT: Out << " > "; break; - default: + // Write out the cast of the instruction's value back to the proper type + // if necessary. + bool NeedsClosingParens = writeInstructionCast(I); + + // Certain icmp predicate require the operand to be forced to a specific type + // so we use writeOperandWithCast here instead of writeOperand. Similarly + // below for operand 1 + writeOperandWithCast(I.getOperand(0), I); + + switch (I.getPredicate()) { + case ICmpInst::ICMP_EQ: + Out << " == "; + break; + case ICmpInst::ICMP_NE: + Out << " != "; + break; + case ICmpInst::ICMP_ULE: + case ICmpInst::ICMP_SLE: + Out << " <= "; + break; + case ICmpInst::ICMP_UGE: + case ICmpInst::ICMP_SGE: + Out << " >= "; + break; + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_SLT: + Out << " < "; + break; + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_SGT: + Out << " > "; + break; + default: #ifndef NDEBUG - errs() << "Invalid icmp predicate!" << I; + errs() << "Invalid icmp predicate!" << I; #endif - llvm_unreachable(0); - } + llvm_unreachable(0); + } - writeOperandWithCast(I.getOperand(1), I); - if (NeedsClosingParens) - Out << "))"; + writeOperandWithCast(I.getOperand(1), I); + if (NeedsClosingParens) + Out << "))"; } void CWriter::visitFCmpInst(FCmpInst &I) { - if (I.getType()->isVectorTy()) { - Out << "llvm_fcmp_" << getCmpPredicateName(I.getPredicate()) << "_"; - printTypeString(Out, I.getOperand(0)->getType(), I.isSigned()); - Out << "("; - writeOperand(I.getOperand(0), ContextCasted); - Out << ", "; - writeOperand(I.getOperand(1), ContextCasted); - Out << ")"; - if (VectorType *VTy = dyn_cast<VectorType>(I.getOperand(0)->getType())) { - CmpDeclTypes.insert(std::pair<CmpInst::Predicate, VectorType*>(I.getPredicate(), VTy)); - TypedefDeclTypes.insert(I.getType()); // insert type not necessarily visible above - } - return; - } + if (I.getType()->isVectorTy()) { + Out << "llvm_fcmp_" << getCmpPredicateName(I.getPredicate()) << "_"; + printTypeString(Out, I.getOperand(0)->getType(), I.isSigned()); + Out << "("; + writeOperand(I.getOperand(0), ContextCasted); + Out << ", "; + writeOperand(I.getOperand(1), ContextCasted); + Out << ")"; + if (VectorType *VTy = dyn_cast<VectorType>(I.getOperand(0)->getType())) { + CmpDeclTypes.insert( + std::pair<CmpInst::Predicate, VectorType *>(I.getPredicate(), VTy)); + TypedefDeclTypes.insert( + I.getType()); // insert type not necessarily visible above + } + return; + } - Out << "llvm_fcmp_" << getCmpPredicateName(I.getPredicate()) << "("; - // Write the first operand - writeOperand(I.getOperand(0), ContextCasted); - Out << ", "; - // Write the second operand - writeOperand(I.getOperand(1), ContextCasted); - Out << ")"; + Out << "llvm_fcmp_" << getCmpPredicateName(I.getPredicate()) << "("; + // Write the first operand + writeOperand(I.getOperand(0), ContextCasted); + Out << ", "; + // Write the second operand + writeOperand(I.getOperand(1), ContextCasted); + Out << ")"; } -static const char * getFloatBitCastField(Type *Ty) { - switch (Ty->getTypeID()) { - default: llvm_unreachable("Invalid Type"); - case Type::FloatTyID: return "Float"; - case Type::DoubleTyID: return "Double"; - case Type::IntegerTyID: { - unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth(); - if (NumBits <= 32) - return "Int32"; - else - return "Int64"; - } - } +static const char *getFloatBitCastField(Type *Ty) { + switch (Ty->getTypeID()) { + default: + llvm_unreachable("Invalid Type"); + case Type::FloatTyID: + return "Float"; + case Type::DoubleTyID: + return "Double"; + case Type::IntegerTyID: { + unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth(); + if (NumBits <= 32) + return "Int32"; + else + return "Int64"; + } + } } void CWriter::visitCastInst(CastInst &I) { - DEBUG(errs() << "This is a cast instruction!\n"); - Type *DstTy = I.getType(); - Type *SrcTy = I.getOperand(0)->getType(); - - if (DstTy->isVectorTy() || SrcTy->isVectorTy() - || DstTy->getPrimitiveSizeInBits() > 64 - || SrcTy->getPrimitiveSizeInBits() > 64) { - Out << "llvm_" << I.getOpcodeName() << "_"; - printTypeString(Out, SrcTy, false); - Out << "_"; - printTypeString(Out, DstTy, false); - Out << "("; - writeOperand(I.getOperand(0), ContextCasted); - Out << ")"; - CastOpDeclTypes.insert(std::pair<Instruction::CastOps, std::pair<Type*, Type*> >(I.getOpcode(), std::pair<Type*, Type*>(SrcTy, DstTy))); - return; - } + DEBUG(errs() << "This is a cast instruction!\n"); + Type *DstTy = I.getType(); + Type *SrcTy = I.getOperand(0)->getType(); - if (isFPIntBitCast(I)) { - Out << '('; - // These int<->float and long<->double casts need to be handled specially - Out << GetValueName(&I) << "__BITCAST_TEMPORARY." - << getFloatBitCastField(I.getOperand(0)->getType()) << " = "; - writeOperand(I.getOperand(0), ContextCasted); - Out << ", " << GetValueName(&I) << "__BITCAST_TEMPORARY." - << getFloatBitCastField(I.getType()); - Out << ')'; - return; - } + if (DstTy->isVectorTy() || SrcTy->isVectorTy() || + DstTy->getPrimitiveSizeInBits() > 64 || + SrcTy->getPrimitiveSizeInBits() > 64) { + Out << "llvm_" << I.getOpcodeName() << "_"; + printTypeString(Out, SrcTy, false); + Out << "_"; + printTypeString(Out, DstTy, false); + Out << "("; + writeOperand(I.getOperand(0), ContextCasted); + Out << ")"; + CastOpDeclTypes.insert( + std::pair<Instruction::CastOps, std::pair<Type *, Type *>>( + I.getOpcode(), std::pair<Type *, Type *>(SrcTy, DstTy))); + return; + } + + if (isFPIntBitCast(I)) { + Out << '('; + // These int<->float and long<->double casts need to be handled specially + Out << GetValueName(&I) << "__BITCAST_TEMPORARY." + << getFloatBitCastField(I.getOperand(0)->getType()) << " = "; + writeOperand(I.getOperand(0), ContextCasted); + Out << ", " << GetValueName(&I) << "__BITCAST_TEMPORARY." + << getFloatBitCastField(I.getType()); + Out << ')'; + return; + } - Out << '('; - printCast(I.getOpcode(), SrcTy, DstTy); + Out << '('; + printCast(I.getOpcode(), SrcTy, DstTy); - // Make a sext from i1 work by subtracting the i1 from 0 (an int). - if (SrcTy == Type::getInt1Ty(I.getContext()) && - I.getOpcode() == Instruction::SExt) - Out << "0-"; + // Make a sext from i1 work by subtracting the i1 from 0 (an int). + if (SrcTy == Type::getInt1Ty(I.getContext()) && + I.getOpcode() == Instruction::SExt) + Out << "0-"; - writeOperand(I.getOperand(0), ContextCasted); + writeOperand(I.getOperand(0), ContextCasted); - if (DstTy == Type::getInt1Ty(I.getContext()) && - (I.getOpcode() == Instruction::Trunc || - I.getOpcode() == Instruction::FPToUI || - I.getOpcode() == Instruction::FPToSI || - I.getOpcode() == Instruction::PtrToInt)) { - // Make sure we really get a trunc to bool by anding the operand with 1 - Out << "&1u"; - } - Out << ')'; + if (DstTy == Type::getInt1Ty(I.getContext()) && + (I.getOpcode() == Instruction::Trunc || + I.getOpcode() == Instruction::FPToUI || + I.getOpcode() == Instruction::FPToSI || + I.getOpcode() == Instruction::PtrToInt)) { + // Make sure we really get a trunc to bool by anding the operand with 1 + Out << "&1u"; + } + Out << ')'; } void CWriter::visitSelectInst(SelectInst &I) { - Out << "llvm_select_"; - printTypeString(Out, I.getType(), false); - Out << "("; - writeOperand(I.getCondition(), ContextCasted); - Out << ", "; - writeOperand(I.getTrueValue(), ContextCasted); - Out << ", "; - writeOperand(I.getFalseValue(), ContextCasted); - Out << ")"; - SelectDeclTypes.insert(I.getType()); - assert(I.getCondition()->getType()->isVectorTy() == I.getType()->isVectorTy()); // TODO: might be scalarty == vectorty + Out << "llvm_select_"; + printTypeString(Out, I.getType(), false); + Out << "("; + writeOperand(I.getCondition(), ContextCasted); + Out << ", "; + writeOperand(I.getTrueValue(), ContextCasted); + Out << ", "; + writeOperand(I.getFalseValue(), ContextCasted); + Out << ")"; + SelectDeclTypes.insert(I.getType()); + assert(I.getCondition()->getType()->isVectorTy() == + I.getType()->isVectorTy()); // TODO: might be scalarty == vectorty } // Returns the macro name or value of the max or min of an integer type // (as defined in limits.h). static void printLimitValue(IntegerType &Ty, bool isSigned, bool isMax, - raw_ostream &Out) { - const char* type; - const char* sprefix = ""; - - unsigned NumBits = Ty.getBitWidth(); - if (NumBits <= 8) { - type = "CHAR"; - sprefix = "S"; - } else if (NumBits <= 16) { - type = "SHRT"; - } else if (NumBits <= 32) { - type = "INT"; - } else if (NumBits <= 64) { - type = "LLONG"; - } else { - llvm_unreachable("Bit widths > 64 not implemented yet"); - } + raw_ostream &Out) { + const char *type; + const char *sprefix = ""; + + unsigned NumBits = Ty.getBitWidth(); + if (NumBits <= 8) { + type = "CHAR"; + sprefix = "S"; + } else if (NumBits <= 16) { + type = "SHRT"; + } else if (NumBits <= 32) { + type = "INT"; + } else if (NumBits <= 64) { + type = "LLONG"; + } else { + llvm_unreachable("Bit widths > 64 not implemented yet"); + } - if (isSigned) - Out << sprefix << type << (isMax ? "_MAX" : "_MIN"); - else - Out << "U" << type << (isMax ? "_MAX" : "0"); + if (isSigned) + Out << sprefix << type << (isMax ? "_MAX" : "_MIN"); + else + Out << "U" << type << (isMax ? "_MAX" : "0"); } #ifndef NDEBUG static bool isSupportedIntegerSize(IntegerType &T) { - return T.getBitWidth() == 8 || T.getBitWidth() == 16 || - T.getBitWidth() == 32 || T.getBitWidth() == 64 || - T.getBitWidth() == 128; + return T.getBitWidth() == 8 || T.getBitWidth() == 16 || + T.getBitWidth() == 32 || T.getBitWidth() == 64 || + T.getBitWidth() == 128; } #endif -void CWriter::printIntrinsicDefinition(FunctionType *funT, - unsigned Opcode, std::string OpName, raw_ostream &Out) { - Type *retT = funT->getReturnType(); - Type *elemT = funT->getParamType(0); - IntegerType *elemIntT = dyn_cast<IntegerType>(elemT); - char i, numParams = funT->getNumParams(); - bool isSigned; - switch (Opcode) { - default: - isSigned = false; - break; - case Intrinsic::sadd_with_overflow: - case Intrinsic::ssub_with_overflow: - case Intrinsic::smul_with_overflow: - isSigned = true; - break; - } - assert(numParams > 0 && numParams < 26); +void CWriter::printIntrinsicDefinition(FunctionType *funT, unsigned Opcode, + std::string OpName, raw_ostream &Out) { + Type *retT = funT->getReturnType(); + Type *elemT = funT->getParamType(0); + IntegerType *elemIntT = dyn_cast<IntegerType>(elemT); + char i, numParams = funT->getNumParams(); + bool isSigned; + switch (Opcode) { + default: + isSigned = false; + break; + case Intrinsic::sadd_with_overflow: + case Intrinsic::ssub_with_overflow: + case Intrinsic::smul_with_overflow: + isSigned = true; + break; + } + assert(numParams > 0 && numParams < 26); - if (isa<VectorType>(retT)) { - // this looks general, but is only actually used for ctpop, ctlz, cttz - Type* *devecFunParams = (Type**)alloca(sizeof(Type*) * numParams); - for (i = 0; i < numParams; i++) { - devecFunParams[(int)i] = funT->params()[(int)i]->getScalarType(); - } - FunctionType *devecFunT = FunctionType::get(funT->getReturnType()->getScalarType(), - makeArrayRef(devecFunParams, numParams), funT->isVarArg()); - printIntrinsicDefinition(devecFunT, Opcode, OpName + "_devec", Out); - } + if (isa<VectorType>(retT)) { + // this looks general, but is only actually used for ctpop, ctlz, cttz + Type **devecFunParams = (Type **)alloca(sizeof(Type *) * numParams); + for (i = 0; i < numParams; i++) { + devecFunParams[(int)i] = funT->params()[(int)i]->getScalarType(); + } + FunctionType *devecFunT = FunctionType::get( + funT->getReturnType()->getScalarType(), + makeArrayRef(devecFunParams, numParams), funT->isVarArg()); + printIntrinsicDefinition(devecFunT, Opcode, OpName + "_devec", Out); + } - // static __forceinline Rty _llvm_op_ixx(unsigned ixx a, unsigned ixx b) { - // Rty r; - // <opcode here> - // return r; - // } - Out << "static __forceinline "; - printTypeName(Out, retT); - Out << " "; - Out << OpName; - Out << "("; - for (i = 0; i < numParams; i++) { - switch (Opcode) { - // optional intrinsic validity assertion checks - default: - // default case: assume all parameters must have the same type - assert(elemT == funT->getParamType(i)); - break; - case Intrinsic::ctlz: - case Intrinsic::cttz: - case Intrinsic::powi: - break; - } - printTypeNameUnaligned(Out, funT->getParamType(i), isSigned); - Out << " " << (char)('a' + i); - if (i != numParams - 1) Out << ", "; - } - Out << ") {\n "; - printTypeName(Out, retT); - Out << " r;\n"; - - if (isa<VectorType>(retT)) { - for (i = 0; i < numParams; i++) { - Out << " r.vector[" << (int)i << "] = " << OpName << "_devec("; - for (char j = 0; j < numParams; j++) { - Out << (char)('a' + j); - if (isa<VectorType>(funT->params()[j])) - Out << ".vector[" << (int)i << "]"; - if (j != numParams - 1) Out << ", "; - } - Out << ");\n"; - } - } - else if (elemIntT) { - // handle integer ops - assert(isSupportedIntegerSize(*elemIntT) && - "CBackend does not support arbitrary size integers."); - switch (Opcode) { - default: + // static __forceinline Rty _llvm_op_ixx(unsigned ixx a, unsigned ixx b) { + // Rty r; + // <opcode here> + // return r; + // } + Out << "static __forceinline "; + printTypeName(Out, retT); + Out << " "; + Out << OpName; + Out << "("; + for (i = 0; i < numParams; i++) { + switch (Opcode) { + // optional intrinsic validity assertion checks + default: + // default case: assume all parameters must have the same type + assert(elemT == funT->getParamType(i)); + break; + case Intrinsic::ctlz: + case Intrinsic::cttz: + case Intrinsic::powi: + break; + } + printTypeNameUnaligned(Out, funT->getParamType(i), isSigned); + Out << " " << (char)('a' + i); + if (i != numParams - 1) + Out << ", "; + } + Out << ") {\n "; + printTypeName(Out, retT); + Out << " r;\n"; + + if (isa<VectorType>(retT)) { + for (i = 0; i < numParams; i++) { + Out << " r.vector[" << (int)i << "] = " << OpName << "_devec("; + for (char j = 0; j < numParams; j++) { + Out << (char)('a' + j); + if (isa<VectorType>(funT->params()[j])) + Out << ".vector[" << (int)i << "]"; + if (j != numParams - 1) + Out << ", "; + } + Out << ");\n"; + } + } else if (elemIntT) { + // handle integer ops + assert(isSupportedIntegerSize(*elemIntT) && + "CBackend does not support arbitrary size integers."); + switch (Opcode) { + default: #ifndef NDEBUG - errs() << "Unsupported Intrinsic!" << Opcode; + errs() << "Unsupported Intrinsic!" << Opcode; #endif - llvm_unreachable(0); - - case Intrinsic::uadd_with_overflow: - // r.field0 = a + b; - // r.field1 = (r.field0 < a); - assert(cast<StructType>(retT)->getElementType(0) == elemT); - Out << " r.field0 = a + b;\n"; - Out << " r.field1 = (a >= -b);\n"; - break; - - case Intrinsic::sadd_with_overflow: - // r.field0 = a + b; - // r.field1 = (b > 0 && a > XX_MAX - b) || - // (b < 0 && a < XX_MIN - b); - assert(cast<StructType>(retT)->getElementType(0) == elemT); - Out << " r.field0 = a + b;\n"; - Out << " r.field1 = (b >= 0 ? a > "; - printLimitValue(*elemIntT, true, true, Out); - Out << " - b : a < "; - printLimitValue(*elemIntT, true, false, Out); - Out << " - b);\n"; - break; - - case Intrinsic::usub_with_overflow: - assert(cast<StructType>(retT)->getElementType(0) == elemT); - Out << " r.field0 = a - b;\n"; - Out << " r.field1 = (a < b);\n"; - break; - - case Intrinsic::ssub_with_overflow: - assert(cast<StructType>(retT)->getElementType(0) == elemT); - Out << " r.field0 = a - b;\n"; - Out << " r.field1 = (b <= 0 ? a > "; - printLimitValue(*elemIntT, true, true, Out); - Out << " + b : a < "; - printLimitValue(*elemIntT, true, false, Out); - Out << " + b);\n"; - break; - - case Intrinsic::umul_with_overflow: - assert(cast<StructType>(retT)->getElementType(0) == elemT); - Out << " r.field1 = LLVMMul_uov(8 * sizeof(a), &a, &b, &r.field0);\n"; - break; - - case Intrinsic::smul_with_overflow: - assert(cast<StructType>(retT)->getElementType(0) == elemT); - Out << " r.field1 = LLVMMul_sov(8 * sizeof(a), &a, &b, &r.field0);\n"; - break; - - case Intrinsic::bswap: - assert(retT == elemT); - Out << " LLVMFlipAllBits(8 * sizeof(a), &a, &r);\n"; - break; - - case Intrinsic::ctpop: - assert(retT == elemT); - Out << " r = "; - if (retT->getPrimitiveSizeInBits() > 64) - Out << "llvm_ctor_u128(0, "; - Out << "LLVMCountPopulation(8 * sizeof(a), &a)"; - if (retT->getPrimitiveSizeInBits() > 64) - Out << ")"; - Out << ";\n"; - break; - - case Intrinsic::ctlz: - assert(retT == elemT); - Out << " (void)b;\n r = "; - if (retT->getPrimitiveSizeInBits() > 64) - Out << "llvm_ctor_u128(0, "; - Out << "LLVMCountLeadingZeros(8 * sizeof(a), &a)"; - if (retT->getPrimitiveSizeInBits() > 64) - Out << ")"; - Out << ";\n"; - break; - - case Intrinsic::cttz: - assert(retT == elemT); - Out << " (void)b;\n r = "; - if (retT->getPrimitiveSizeInBits() > 64) - Out << "llvm_ctor_u128(0, "; - Out << "LLVMCountTrailingZeros(8 * sizeof(a), &a)"; - if (retT->getPrimitiveSizeInBits() > 64) - Out << ")"; - Out << ";\n"; - break; - } + llvm_unreachable(0); + + case Intrinsic::uadd_with_overflow: + // r.field0 = a + b; + // r.field1 = (r.field0 < a); + assert(cast<StructType>(retT)->getElementType(0) == elemT); + Out << " r.field0 = a + b;\n"; + Out << " r.field1 = (a >= -b);\n"; + break; - } else { - // handle FP ops - const char *suffix; - assert(retT == elemT); - if (elemT->isFloatTy() || elemT->isHalfTy()) { - suffix = "f"; - } else if (elemT->isDoubleTy()) { - suffix = ""; - } else if (elemT->isFP128Ty()) { - } else if (elemT->isX86_FP80Ty()) { - } else if (elemT->isPPC_FP128Ty()) { - suffix = "l"; - } else { + case Intrinsic::sadd_with_overflow: + // r.field0 = a + b; + // r.field1 = (b > 0 && a > XX_MAX - b) || + // (b < 0 && a < XX_MIN - b); + assert(cast<StructType>(retT)->getElementType(0) == elemT); + Out << " r.field0 = a + b;\n"; + Out << " r.field1 = (b >= 0 ? a > "; + printLimitValue(*elemIntT, true, true, Out); + Out << " - b : a < "; + printLimitValue(*elemIntT, true, false, Out); + Out << " - b);\n"; + break; + + case Intrinsic::usub_with_overflow: + assert(cast<StructType>(retT)->getElementType(0) == elemT); + Out << " r.field0 = a - b;\n"; + Out << " r.field1 = (a < b);\n"; + break; + + case Intrinsic::ssub_with_overflow: + assert(cast<StructType>(retT)->getElementType(0) == elemT); + Out << " r.field0 = a - b;\n"; + Out << " r.field1 = (b <= 0 ? a > "; + printLimitValue(*elemIntT, true, true, Out); + Out << " + b : a < "; + printLimitValue(*elemIntT, true, false, Out); + Out << " + b);\n"; + break; + + case Intrinsic::umul_with_overflow: + assert(cast<StructType>(retT)->getElementType(0) == elemT); + Out << " r.field1 = LLVMMul_uov(8 * sizeof(a), &a, &b, &r.field0);\n"; + break; + + case Intrinsic::smul_with_overflow: + assert(cast<StructType>(retT)->getElementType(0) == elemT); + Out << " r.field1 = LLVMMul_sov(8 * sizeof(a), &a, &b, &r.field0);\n"; + break; + + case Intrinsic::bswap: + assert(retT == elemT); + Out << " LLVMFlipAllBits(8 * sizeof(a), &a, &r);\n"; + break; + + case Intrinsic::ctpop: + assert(retT == elemT); + Out << " r = "; + if (retT->getPrimitiveSizeInBits() > 64) + Out << "llvm_ctor_u128(0, "; + Out << "LLVMCountPopulation(8 * sizeof(a), &a)"; + if (retT->getPrimitiveSizeInBits() > 64) + Out << ")"; + Out << ";\n"; + break; + + case Intrinsic::ctlz: + assert(retT == elemT); + Out << " (void)b;\n r = "; + if (retT->getPrimitiveSizeInBits() > 64) + Out << "llvm_ctor_u128(0, "; + Out << "LLVMCountLeadingZeros(8 * sizeof(a), &a)"; + if (retT->getPrimitiveSizeInBits() > 64) + Out << ")"; + Out << ";\n"; + break; + + case Intrinsic::cttz: + assert(retT == elemT); + Out << " (void)b;\n r = "; + if (retT->getPrimitiveSizeInBits() > 64) + Out << "llvm_ctor_u128(0, "; + Out << "LLVMCountTrailingZeros(8 * sizeof(a), &a)"; + if (retT->getPrimitiveSizeInBits() > 64) + Out << ")"; + Out << ";\n"; + break; + } + + } else { + // handle FP ops + const char *suffix; + assert(retT == elemT); + if (elemT->isFloatTy() || elemT->isHalfTy()) { + suffix = "f"; + } else if (elemT->isDoubleTy()) { + suffix = ""; + } else if (elemT->isFP128Ty()) { + } else if (elemT->isX86_FP80Ty()) { + } else if (elemT->isPPC_FP128Ty()) { + suffix = "l"; + } else { #ifndef NDEBUG - errs() << "Unsupported Intrinsic!" << Opcode; + errs() << "Unsupported Intrinsic!" << Opcode; #endif - llvm_unreachable(0); - } + llvm_unreachable(0); + } - switch (Opcode) { - default: + switch (Opcode) { + default: #ifndef NDEBUG - errs() << "Unsupported Intrinsic!" << Opcode; + errs() << "Unsupported Intrinsic!" << Opcode; #endif - llvm_unreachable(0); + llvm_unreachable(0); - case Intrinsic::ceil: - Out << " r = ceil" << suffix << "(a);\n"; - break; - - case Intrinsic::fabs: - Out << " r = fabs" << suffix << "(a);\n"; - break; + case Intrinsic::ceil: + Out << " r = ceil" << suffix << "(a);\n"; + break; - case Intrinsic::floor: - Out << " r = floor" << suffix << "(a);\n"; - break; + case Intrinsic::fabs: + Out << " r = fabs" << suffix << "(a);\n"; + break; - case Intrinsic::fma: - Out << " r = fma" << suffix << "(a, b, c);\n"; - break; + case Intrinsic::floor: + Out << " r = floor" << suffix << "(a);\n"; + break; - case Intrinsic::fmuladd: - Out << " r = a * b + c;\n"; - break; + case Intrinsic::fma: + Out << " r = fma" << suffix << "(a, b, c);\n"; + break; - case Intrinsic::pow: - case Intrinsic::powi: - Out << " r = pow" << suffix << "(a, b);\n"; - break; + case Intrinsic::fmuladd: + Out << " r = a * b + c;\n"; + break; - case Intrinsic::rint: - Out << " r = rint" << suffix << "(a);\n"; - break; + case Intrinsic::pow: + case Intrinsic::powi: + Out << " r = pow" << suffix << "(a, b);\n"; + break; - case Intrinsic::sqrt: - Out << " r = sqrt" << "(a);\n"; - break; + case Intrinsic::rint: + Out << " r = rint" << suffix << "(a);\n"; + break; - case Intrinsic::trunc: - Out << " r = trunc" << suffix << "(a);\n"; - break; + case Intrinsic::sqrt: + Out << " r = sqrt" + << "(a);\n"; + break; - } - } + case Intrinsic::trunc: + Out << " r = trunc" << suffix << "(a);\n"; + break; + } + } - Out << " return r;\n}\n"; + Out << " return r;\n}\n"; } void CWriter::printIntrinsicDefinition(Function &F, raw_ostream &Out) { - FunctionType *funT = F.getFunctionType(); - unsigned Opcode = F.getIntrinsicID(); - std::string OpName = GetValueName(&F); - printIntrinsicDefinition(funT, Opcode, OpName, Out); + FunctionType *funT = F.getFunctionType(); + unsigned Opcode = F.getIntrinsicID(); + std::string OpName = GetValueName(&F); + printIntrinsicDefinition(funT, Opcode, OpName, Out); } void CWriter::lowerIntrinsics(Function &F) { - // Examine all the instructions in this function to find the intrinsics that - // need to be lowered. - for (Function::iterator BB = F.begin(), EE = F.end(); BB != EE; ++BB) - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) - if (CallInst *CI = dyn_cast<CallInst>(I++)) - if (Function *F = CI->getCalledFunction()) - switch (F->getIntrinsicID()) { - case Intrinsic::not_intrinsic: - case Intrinsic::vastart: - case Intrinsic::vacopy: - case Intrinsic::vaend: - case Intrinsic::returnaddress: - case Intrinsic::frameaddress: - case Intrinsic::setjmp: - case Intrinsic::longjmp: - case Intrinsic::sigsetjmp: - case Intrinsic::siglongjmp: - case Intrinsic::prefetch: - case Intrinsic::x86_sse_cmp_ss: - case Intrinsic::x86_sse_cmp_ps: - case Intrinsic::x86_sse2_cmp_sd: - case Intrinsic::x86_sse2_cmp_pd: - case Intrinsic::ppc_altivec_lvsl: - case Intrinsic::uadd_with_overflow: - case Intrinsic::sadd_with_overflow: - case Intrinsic::usub_with_overflow: - case Intrinsic::ssub_with_overflow: - case Intrinsic::umul_with_overflow: - case Intrinsic::smul_with_overflow: - case Intrinsic::bswap: - case Intrinsic::ceil: - case Intrinsic::ctlz: - case Intrinsic::ctpop: - case Intrinsic::cttz: - case Intrinsic::fabs: - case Intrinsic::floor: - case Intrinsic::fma: - case Intrinsic::fmuladd: - case Intrinsic::pow: - case Intrinsic::powi: - case Intrinsic::rint: - case Intrinsic::sqrt: - case Intrinsic::trunc: - case Intrinsic::trap: - case Intrinsic::stackprotector: - case Intrinsic::dbg_value: - case Intrinsic::dbg_declare: - // We directly implement these intrinsics - break; - default: - // All other intrinsic calls we must lower. - BasicBlock::iterator Before = E; - if (CI != &BB->front()) - Before = std::prev(BasicBlock::iterator(CI)); - - IL->LowerIntrinsicCall(CI); - if (Before != E) { // Move iterator to instruction after call - I = Before; ++I; - } else { - I = BB->begin(); - } - // If the intrinsic got lowered to another call, and that call has - // a definition then we need to make sure its prototype is emitted - // before any calls to it. - if (CallInst *Call = dyn_cast<CallInst>(I)) - if (Function *NewF = Call->getCalledFunction()) - if (!NewF->isDeclaration()) - prototypesToGen.push_back(NewF); - - break; - } + // Examine all the instructions in this function to find the intrinsics that + // need to be lowered. + for (Function::iterator BB = F.begin(), EE = F.end(); BB != EE; ++BB) + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) + if (CallInst *CI = dyn_cast<CallInst>(I++)) + if (Function *F = CI->getCalledFunction()) + switch (F->getIntrinsicID()) { + case Intrinsic::not_intrinsic: + case Intrinsic::vastart: + case Intrinsic::vacopy: + case Intrinsic::vaend: + case Intrinsic::returnaddress: + case Intrinsic::frameaddress: + case Intrinsic::setjmp: + case Intrinsic::longjmp: + case Intrinsic::sigsetjmp: + case Intrinsic::siglongjmp: + case Intrinsic::prefetch: + case Intrinsic::x86_sse_cmp_ss: + case Intrinsic::x86_sse_cmp_ps: + case Intrinsic::x86_sse2_cmp_sd: + case Intrinsic::x86_sse2_cmp_pd: + case Intrinsic::ppc_altivec_lvsl: + case Intrinsic::uadd_with_overflow: + case Intrinsic::sadd_with_overflow: + case Intrinsic::usub_with_overflow: + case Intrinsic::ssub_with_overflow: + case Intrinsic::umul_with_overflow: + case Intrinsic::smul_with_overflow: + case Intrinsic::bswap: + case Intrinsic::ceil: + case Intrinsic::ctlz: + case Intrinsic::ctpop: + case Intrinsic::cttz: + case Intrinsic::fabs: + case Intrinsic::floor: + case Intrinsic::fma: + case Intrinsic::fmuladd: + case Intrinsic::pow: + case Intrinsic::powi: + case Intrinsic::rint: + case Intrinsic::sqrt: + case Intrinsic::trunc: + case Intrinsic::trap: + case Intrinsic::stackprotector: + case Intrinsic::dbg_value: + case Intrinsic::dbg_declare: + // We directly implement these intrinsics + break; + default: + // All other intrinsic calls we must lower. + BasicBlock::iterator Before = E; + if (CI != &BB->front()) + Before = std::prev(BasicBlock::iterator(CI)); + + IL->LowerIntrinsicCall(CI); + if (Before != E) { // Move iterator to instruction after call + I = Before; + ++I; + } else { + I = BB->begin(); + } + // If the intrinsic got lowered to another call, and that call has + // a definition then we need to make sure its prototype is emitted + // before any calls to it. + if (CallInst *Call = dyn_cast<CallInst>(I)) + if (Function *NewF = Call->getCalledFunction()) + if (!NewF->isDeclaration()) + prototypesToGen.push_back(NewF); + + break; + } } void CWriter::visitCallInst(CallInst &I) { - if (isa<InlineAsm>(I.getCalledValue())) - return visitInlineAsm(I); - - // Handle intrinsic function calls first... - if (Function *F = I.getCalledFunction()) - if (Intrinsic::ID ID = (Intrinsic::ID)F->getIntrinsicID()) - if (visitBuiltinCall(I, ID)) - return; - - Value *Callee = I.getCalledValue(); - - PointerType *PTy = cast<PointerType>(Callee->getType()); - FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); - - // If this is a call to a struct-return function, assign to the first - // parameter instead of passing it to the call. - - // CHECK: If AttributeList replaces AttributeSet for CallInst - const AttributeList PAL = I.getAttributes(); - bool hasByVal = I.hasByValArgument(); - bool isStructRet = I.hasStructRetAttr(); - if (isStructRet) { - writeOperandDeref(I.getArgOperand(0)); - Out << " = "; - } + if (isa<InlineAsm>(I.getCalledValue())) + return visitInlineAsm(I); - if (I.isTailCall()) Out << " /*tail*/ "; - - // If this is an indirect call to a struct return function, we need to cast - // the pointer. Ditto for indirect calls with byval arguments. - bool NeedsCast = (hasByVal || isStructRet || I.getCallingConv() != CallingConv::C) && !isa<Function>(Callee); - - // GCC is a real PITA. It does not permit codegening casts of functions to - // function pointers if they are in a call (it generates a trap instruction - // instead!). We work around this by inserting a cast to void* in between - // the function and the function pointer cast. Unfortunately, we can't just - // form the constant expression here, because the folder will immediately - // nuke it. - // - // Note finally, that this is completely unsafe. ANSI C does not guarantee - // that void* and function pointers have the same size. :( To deal with this - // in the common case, we handle casts where the number of arguments passed - // match exactly. - // - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Callee)) - if (CE->isCast()) - if (Function *RF = dyn_cast<Function>(CE->getOperand(0))) { - NeedsCast = true; - Callee = RF; - } - - if (NeedsCast) { - // Ok, just cast the pointer type. - Out << "(("; - printTypeName(Out, I.getCalledValue()->getType()->getPointerElementType(), false, std::make_pair(PAL, I.getCallingConv())); - Out << "*)(void*)"; - } - writeOperand(Callee, ContextCasted); - if (NeedsCast) Out << ')'; + // Handle intrinsic function calls first... + if (Function *F = I.getCalledFunction()) + if (Intrinsic::ID ID = (Intrinsic::ID)F->getIntrinsicID()) + if (visitBuiltinCall(I, ID)) + return; - Out << '('; + Value *Callee = I.getCalledValue(); - bool PrintedArg = false; - if (FTy->isVarArg() && !FTy->getNumParams()) { - Out << "0 /*dummy arg*/"; - PrintedArg = true; - } + PointerType *PTy = cast<PointerType>(Callee->getType()); + FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); - unsigned NumDeclaredParams = FTy->getNumParams(); - CallSite CS(&I); - CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); - unsigned ArgNo = 0; - if (isStructRet) { // Skip struct return argument. - ++AI; - ++ArgNo; - } + // If this is a call to a struct-return function, assign to the first + // parameter instead of passing it to the call. - Function *F = I.getCalledFunction(); - if (F) { - StringRef Name = F->getName(); - // emit cast for the first argument to type expected by header prototype - // the jmp_buf type is an array, so the array-to-pointer decay adds the - // strange extra *'s - if (Name == "sigsetjmp") - Out << "*(sigjmp_buf*)"; - else if (Name == "setjmp") - Out << "*(jmp_buf*)"; - } + // CHECK: If AttributeList replaces AttributeSet for CallInst + const AttributeList PAL = I.getAttributes(); + bool hasByVal = I.hasByValArgument(); + bool isStructRet = I.hasStructRetAttr(); + if (isStructRet) { + writeOperandDeref(I.getArgOperand(0)); + Out << " = "; + } - for (; AI != AE; ++AI, ++ArgNo) { - if (PrintedArg) Out << ", "; - if (ArgNo < NumDeclaredParams && - (*AI)->getType() != FTy->getParamType(ArgNo)) { - Out << '('; - printTypeNameUnaligned(Out, FTy->getParamType(ArgNo), - /*isSigned=*/PAL.hasAttribute(ArgNo+1, Attribute::SExt)); - Out << ')'; - } - // Check if the argument is expected to be passed by value. - if (I.getAttributes().hasAttribute(ArgNo+1, Attribute::ByVal)) - writeOperandDeref(*AI); - else - writeOperand(*AI, ContextCasted); - PrintedArg = true; - } - Out << ')'; + if (I.isTailCall()) + Out << " /*tail*/ "; + + // If this is an indirect call to a struct return function, we need to cast + // the pointer. Ditto for indirect calls with byval arguments. + bool NeedsCast = + (hasByVal || isStructRet || I.getCallingConv() != CallingConv::C) && + !isa<Function>(Callee); + + // GCC is a real PITA. It does not permit codegening casts of functions to + // function pointers if they are in a call (it generates a trap instruction + // instead!). We work around this by inserting a cast to void* in between + // the function and the function pointer cast. Unfortunately, we can't just + // form the constant expression here, because the folder will immediately + // nuke it. + // + // Note finally, that this is completely unsafe. ANSI C does not guarantee + // that void* and function pointers have the same size. :( To deal with this + // in the common case, we handle casts where the number of arguments passed + // match exactly. + // + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Callee)) + if (CE->isCast()) + if (Function *RF = dyn_cast<Function>(CE->getOperand(0))) { + NeedsCast = true; + Callee = RF; + } + + if (NeedsCast) { + // Ok, just cast the pointer type. + Out << "(("; + printTypeName(Out, I.getCalledValue()->getType()->getPointerElementType(), + false, std::make_pair(PAL, I.getCallingConv())); + Out << "*)(void*)"; + } + writeOperand(Callee, ContextCasted); + if (NeedsCast) + Out << ')'; + + Out << '('; + + bool PrintedArg = false; + if (FTy->isVarArg() && !FTy->getNumParams()) { + Out << "0 /*dummy arg*/"; + PrintedArg = true; + } + + unsigned NumDeclaredParams = FTy->getNumParams(); + CallSite CS(&I); + CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); + unsigned ArgNo = 0; + if (isStructRet) { // Skip struct return argument. + ++AI; + ++ArgNo; + } + + Function *F = I.getCalledFunction(); + if (F) { + StringRef Name = F->getName(); + // emit cast for the first argument to type expected by header prototype + // the jmp_buf type is an array, so the array-to-pointer decay adds the + // strange extra *'s + if (Name == "sigsetjmp") + Out << "*(sigjmp_buf*)"; + else if (Name == "setjmp") + Out << "*(jmp_buf*)"; + } + + for (; AI != AE; ++AI, ++ArgNo) { + if (PrintedArg) + Out << ", "; + if (ArgNo < NumDeclaredParams && + (*AI)->getType() != FTy->getParamType(ArgNo)) { + Out << '('; + printTypeNameUnaligned( + Out, FTy->getParamType(ArgNo), + /*isSigned=*/PAL.hasAttribute(ArgNo + 1, Attribute::SExt)); + Out << ')'; + } + // Check if the argument is expected to be passed by value. + if (I.getAttributes().hasAttribute(ArgNo + 1, Attribute::ByVal)) + writeOperandDeref(*AI); + else + writeOperand(*AI, ContextCasted); + PrintedArg = true; + } + Out << ')'; } /// visitBuiltinCall - Handle the call to the specified builtin. Returns true /// if the entire call is handled, return false if it wasn't handled bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID) { - switch (ID) { - default: { + switch (ID) { + default: { #ifndef NDEBUG - errs() << "Unknown LLVM intrinsic! " << I; + errs() << "Unknown LLVM intrinsic! " << I; #endif - llvm_unreachable(0); - return false; - } - - case Intrinsic::dbg_value: - case Intrinsic::dbg_declare: - return true; // ignore these intrinsics - case Intrinsic::vastart: - Out << "0; "; - - Out << "va_start(*(va_list*)"; - writeOperand(I.getArgOperand(0), ContextCasted); - Out << ", "; - // Output the last argument to the enclosing function. - if (I.getParent()->getParent()->arg_empty()) - Out << "vararg_dummy_arg"; - else - writeOperand(&*(I.getParent()->getParent()->arg_end() - 1)); - Out << ')'; - return true; - case Intrinsic::vaend: - if (!isa<ConstantPointerNull>(I.getArgOperand(0))) { - Out << "0; va_end(*(va_list*)"; - writeOperand(I.getArgOperand(0), ContextCasted); - Out << ')'; - } else { - Out << "va_end(*(va_list*)0)"; - } - return true; - case Intrinsic::vacopy: - Out << "0; "; - Out << "va_copy(*(va_list*)"; - writeOperand(I.getArgOperand(0), ContextCasted); - Out << ", *(va_list*)"; - writeOperand(I.getArgOperand(1), ContextCasted); - Out << ')'; - return true; - case Intrinsic::returnaddress: - Out << "__builtin_return_address("; - writeOperand(I.getArgOperand(0), ContextCasted); - Out << ')'; - return true; - case Intrinsic::frameaddress: - Out << "__builtin_frame_address("; - writeOperand(I.getArgOperand(0), ContextCasted); - Out << ')'; - return true; - case Intrinsic::setjmp: - Out << "setjmp(*(jmp_buf*)"; - writeOperand(I.getArgOperand(0), ContextCasted); - Out << ')'; - return true; - case Intrinsic::longjmp: - Out << "longjmp(*(jmp_buf*)"; - writeOperand(I.getArgOperand(0), ContextCasted); - Out << ", "; - writeOperand(I.getArgOperand(1), ContextCasted); - Out << ')'; - return true; - case Intrinsic::sigsetjmp: - Out << "sigsetjmp(*(sigjmp_buf*)"; - writeOperand(I.getArgOperand(0), ContextCasted); - Out << ','; - writeOperand(I.getArgOperand(1), ContextCasted); - Out << ')'; - return true; - case Intrinsic::siglongjmp: - Out << "siglongjmp(*(sigjmp_buf*)"; - writeOperand(I.getArgOperand(0), ContextCasted); - Out << ", "; - writeOperand(I.getArgOperand(1), ContextCasted); - Out << ')'; - return true; - case Intrinsic::prefetch: - Out << "LLVM_PREFETCH((const void *)"; - writeOperand(I.getArgOperand(0), ContextCasted); - Out << ", "; - writeOperand(I.getArgOperand(1), ContextCasted); - Out << ", "; - writeOperand(I.getArgOperand(2), ContextCasted); - Out << ")"; - return true; - case Intrinsic::stacksave: - // Emit this as: Val = 0; *((void**)&Val) = __builtin_stack_save() - // to work around GCC bugs (see PR1809). - Out << "0; *((void**)&" << GetValueName(&I) - << ") = __builtin_stack_save()"; - return true; - case Intrinsic::x86_sse_cmp_ss: - case Intrinsic::x86_sse_cmp_ps: - case Intrinsic::x86_sse2_cmp_sd: - case Intrinsic::x86_sse2_cmp_pd: - Out << '('; - printTypeName(Out, I.getType()); - Out << ')'; - // Multiple GCC builtins multiplex onto this intrinsic. - switch (cast<ConstantInt>(I.getArgOperand(2))->getZExtValue()) { - default: llvm_unreachable("Invalid llvm.x86.sse.cmp!"); - case 0: Out << "__builtin_ia32_cmpeq"; break; - case 1: Out << "__builtin_ia32_cmplt"; break; - case 2: Out << "__builtin_ia32_cmple"; break; - case 3: Out << "__builtin_ia32_cmpunord"; break; - case 4: Out << "__builtin_ia32_cmpneq"; break; - case 5: Out << "__builtin_ia32_cmpnlt"; break; - case 6: Out << "__builtin_ia32_cmpnle"; break; - case 7: Out << "__builtin_ia32_cmpord"; break; - } - if (ID == Intrinsic::x86_sse_cmp_ps || ID == Intrinsic::x86_sse2_cmp_pd) - Out << 'p'; - else - Out << 's'; - if (ID == Intrinsic::x86_sse_cmp_ss || ID == Intrinsic::x86_sse_cmp_ps) - Out << 's'; - else - Out << 'd'; - - Out << "("; - writeOperand(I.getArgOperand(0), ContextCasted); - Out << ", "; - writeOperand(I.getArgOperand(1), ContextCasted); - Out << ")"; - return true; - case Intrinsic::ppc_altivec_lvsl: - Out << '('; - printTypeName(Out, I.getType()); - Out << ')'; - Out << "__builtin_altivec_lvsl(0, (void*)"; - writeOperand(I.getArgOperand(0), ContextCasted); - Out << ")"; - return true; - case Intrinsic::stackprotector: - writeOperandDeref(I.getArgOperand(1)); - Out << " = "; - writeOperand(I.getArgOperand(0), ContextCasted); - return true; - case Intrinsic::uadd_with_overflow: - case Intrinsic::sadd_with_overflow: - case Intrinsic::usub_with_overflow: - case Intrinsic::ssub_with_overflow: - case Intrinsic::umul_with_overflow: - case Intrinsic::smul_with_overflow: - case Intrinsic::bswap: - case Intrinsic::ceil: - case Intrinsic::ctlz: - case Intrinsic::ctpop: - case Intrinsic::cttz: - case Intrinsic::fabs: - case Intrinsic::floor: - case Intrinsic::fma: - case Intrinsic::fmuladd: - case Intrinsic::pow: - case Intrinsic::powi: - case Intrinsic::rint: - case Intrinsic::sqrt: - case Intrinsic::trap: - case Intrinsic::trunc: - return false; // these use the normal function call emission - } + llvm_unreachable(0); + return false; + } + + case Intrinsic::dbg_value: + case Intrinsic::dbg_declare: + return true; // ignore these intrinsics + case Intrinsic::vastart: + Out << "0; "; + + Out << "va_start(*(va_list*)"; + writeOperand(I.getArgOperand(0), ContextCasted); + Out << ", "; + // Output the last argument to the enclosing function. + if (I.getParent()->getParent()->arg_empty()) + Out << "vararg_dummy_arg"; + else + writeOperand(&*(I.getParent()->getParent()->arg_end() - 1)); + Out << ')'; + return true; + case Intrinsic::vaend: + if (!isa<ConstantPointerNull>(I.getArgOperand(0))) { + Out << "0; va_end(*(va_list*)"; + writeOperand(I.getArgOperand(0), ContextCasted); + Out << ')'; + } else { + Out << "va_end(*(va_list*)0)"; + } + return true; + case Intrinsic::vacopy: + Out << "0; "; + Out << "va_copy(*(va_list*)"; + writeOperand(I.getArgOperand(0), ContextCasted); + Out << ", *(va_list*)"; + writeOperand(I.getArgOperand(1), ContextCasted); + Out << ')'; + return true; + case Intrinsic::returnaddress: + Out << "__builtin_return_address("; + writeOperand(I.getArgOperand(0), ContextCasted); + Out << ')'; + return true; + case Intrinsic::frameaddress: + Out << "__builtin_frame_address("; + writeOperand(I.getArgOperand(0), ContextCasted); + Out << ')'; + return true; + case Intrinsic::setjmp: + Out << "setjmp(*(jmp_buf*)"; + writeOperand(I.getArgOperand(0), ContextCasted); + Out << ')'; + return true; + case Intrinsic::longjmp: + Out << "longjmp(*(jmp_buf*)"; + writeOperand(I.getArgOperand(0), ContextCasted); + Out << ", "; + writeOperand(I.getArgOperand(1), ContextCasted); + Out << ')'; + return true; + case Intrinsic::sigsetjmp: + Out << "sigsetjmp(*(sigjmp_buf*)"; + writeOperand(I.getArgOperand(0), ContextCasted); + Out << ','; + writeOperand(I.getArgOperand(1), ContextCasted); + Out << ')'; + return true; + case Intrinsic::siglongjmp: + Out << "siglongjmp(*(sigjmp_buf*)"; + writeOperand(I.getArgOperand(0), ContextCasted); + Out << ", "; + writeOperand(I.getArgOperand(1), ContextCasted); + Out << ')'; + return true; + case Intrinsic::prefetch: + Out << "LLVM_PREFETCH((const void *)"; + writeOperand(I.getArgOperand(0), ContextCasted); + Out << ", "; + writeOperand(I.getArgOperand(1), ContextCasted); + Out << ", "; + writeOperand(I.getArgOperand(2), ContextCasted); + Out << ")"; + return true; + case Intrinsic::stacksave: + // Emit this as: Val = 0; *((void**)&Val) = __builtin_stack_save() + // to work around GCC bugs (see PR1809). + Out << "0; *((void**)&" << GetValueName(&I) << ") = __builtin_stack_save()"; + return true; + case Intrinsic::x86_sse_cmp_ss: + case Intrinsic::x86_sse_cmp_ps: + case Intrinsic::x86_sse2_cmp_sd: + case Intrinsic::x86_sse2_cmp_pd: + Out << '('; + printTypeName(Out, I.getType()); + Out << ')'; + // Multiple GCC builtins multiplex onto this intrinsic. + switch (cast<ConstantInt>(I.getArgOperand(2))->getZExtValue()) { + default: + llvm_unreachable("Invalid llvm.x86.sse.cmp!"); + case 0: + Out << "__builtin_ia32_cmpeq"; + break; + case 1: + Out << "__builtin_ia32_cmplt"; + break; + case 2: + Out << "__builtin_ia32_cmple"; + break; + case 3: + Out << "__builtin_ia32_cmpunord"; + break; + case 4: + Out << "__builtin_ia32_cmpneq"; + break; + case 5: + Out << "__builtin_ia32_cmpnlt"; + break; + case 6: + Out << "__builtin_ia32_cmpnle"; + break; + case 7: + Out << "__builtin_ia32_cmpord"; + break; + } + if (ID == Intrinsic::x86_sse_cmp_ps || ID == Intrinsic::x86_sse2_cmp_pd) + Out << 'p'; + else + Out << 's'; + if (ID == Intrinsic::x86_sse_cmp_ss || ID == Intrinsic::x86_sse_cmp_ps) + Out << 's'; + else + Out << 'd'; + + Out << "("; + writeOperand(I.getArgOperand(0), ContextCasted); + Out << ", "; + writeOperand(I.getArgOperand(1), ContextCasted); + Out << ")"; + return true; + case Intrinsic::ppc_altivec_lvsl: + Out << '('; + printTypeName(Out, I.getType()); + Out << ')'; + Out << "__builtin_altivec_lvsl(0, (void*)"; + writeOperand(I.getArgOperand(0), ContextCasted); + Out << ")"; + return true; + case Intrinsic::stackprotector: + writeOperandDeref(I.getArgOperand(1)); + Out << " = "; + writeOperand(I.getArgOperand(0), ContextCasted); + return true; + case Intrinsic::uadd_with_overflow: + case Intrinsic::sadd_with_overflow: + case Intrinsic::usub_with_overflow: + case Intrinsic::ssub_with_overflow: + case Intrinsic::umul_with_overflow: + case Intrinsic::smul_with_overflow: + case Intrinsic::bswap: + case Intrinsic::ceil: + case Intrinsic::ctlz: + case Intrinsic::ctpop: + case Intrinsic::cttz: + case Intrinsic::fabs: + case Intrinsic::floor: + case Intrinsic::fma: + case Intrinsic::fmuladd: + case Intrinsic::pow: + case Intrinsic::powi: + case Intrinsic::rint: + case Intrinsic::sqrt: + case Intrinsic::trap: + case Intrinsic::trunc: + return false; // these use the normal function call emission + } } -//This converts the llvm constraint string to something gcc is expecting. -//TODO: work out platform independent constraints and factor those out +// This converts the llvm constraint string to something gcc is expecting. +// TODO: work out platform independent constraints and factor those out // of the per target tables // handle multiple constraint codes -std::string CWriter::InterpretASMConstraint(InlineAsm::ConstraintInfo& c) { - return TargetLowering::AsmOperandInfo(c).ConstraintCode; +std::string CWriter::InterpretASMConstraint(InlineAsm::ConstraintInfo &c) { + return TargetLowering::AsmOperandInfo(c).ConstraintCode; #if 0 assert(c.Codes.size() == 1 && "Too many asm constraint codes to handle"); @@ -4923,513 +5358,524 @@ std::string CWriter::InterpretASMConstraint(InlineAsm::ConstraintInfo& c) { #endif } -//TODO: import logic from AsmPrinter.cpp +// TODO: import logic from AsmPrinter.cpp static std::string gccifyAsm(std::string asmstr) { - for (std::string::size_type i = 0; i != asmstr.size(); ++i) - if (asmstr[i] == '\n') - asmstr.replace(i, 1, "\\n"); - else if (asmstr[i] == '\t') - asmstr.replace(i, 1, "\\t"); - else if (asmstr[i] == '$') { - if (asmstr[i + 1] == '{') { - std::string::size_type a = asmstr.find_first_of(':', i + 1); - std::string::size_type b = asmstr.find_first_of('}', i + 1); - std::string n = "%" + - asmstr.substr(a + 1, b - a - 1) + - asmstr.substr(i + 2, a - i - 2); - asmstr.replace(i, b - i + 1, n); - i += n.size() - 1; - } else - asmstr.replace(i, 1, "%"); - } - else if (asmstr[i] == '%')//grr - { asmstr.replace(i, 1, "%%"); ++i;} + for (std::string::size_type i = 0; i != asmstr.size(); ++i) + if (asmstr[i] == '\n') + asmstr.replace(i, 1, "\\n"); + else if (asmstr[i] == '\t') + asmstr.replace(i, 1, "\\t"); + else if (asmstr[i] == '$') { + if (asmstr[i + 1] == '{') { + std::string::size_type a = asmstr.find_first_of(':', i + 1); + std::string::size_type b = asmstr.find_first_of('}', i + 1); + std::string n = "%" + asmstr.substr(a + 1, b - a - 1) + + asmstr.substr(i + 2, a - i - 2); + asmstr.replace(i, b - i + 1, n); + i += n.size() - 1; + } else + asmstr.replace(i, 1, "%"); + } else if (asmstr[i] == '%') // grr + { + asmstr.replace(i, 1, "%%"); + ++i; + } - return asmstr; + return asmstr; } -//TODO: assumptions about what consume arguments from the call are likely wrong +// TODO: assumptions about what consume arguments from the call are likely wrong // handle communitivity void CWriter::visitInlineAsm(CallInst &CI) { - InlineAsm* as = cast<InlineAsm>(CI.getCalledValue()); - InlineAsm::ConstraintInfoVector Constraints = as->ParseConstraints(); - - std::vector<std::pair<Value*, int> > ResultVals; - if (CI.getType() == Type::getVoidTy(CI.getContext())) - ; - else if (StructType *ST = dyn_cast<StructType>(CI.getType())) { - for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) - ResultVals.push_back(std::make_pair(&CI, (int)i)); - } else { - ResultVals.push_back(std::make_pair(&CI, -1)); - } - - // Fix up the asm string for gcc and emit it. - Out << "__asm__ volatile (\"" << gccifyAsm(as->getAsmString()) << "\"\n"; - Out << " :"; - - unsigned ValueCount = 0; - bool IsFirst = true; + InlineAsm *as = cast<InlineAsm>(CI.getCalledValue()); + InlineAsm::ConstraintInfoVector Constraints = as->ParseConstraints(); + + std::vector<std::pair<Value *, int>> ResultVals; + if (CI.getType() == Type::getVoidTy(CI.getContext())) + ; + else if (StructType *ST = dyn_cast<StructType>(CI.getType())) { + for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) + ResultVals.push_back(std::make_pair(&CI, (int)i)); + } else { + ResultVals.push_back(std::make_pair(&CI, -1)); + } - // Convert over all the output constraints. - for (InlineAsm::ConstraintInfoVector::iterator I = Constraints.begin(), - E = Constraints.end(); I != E; ++I) { + // Fix up the asm string for gcc and emit it. + Out << "__asm__ volatile (\"" << gccifyAsm(as->getAsmString()) << "\"\n"; + Out << " :"; - if (I->Type != InlineAsm::isOutput) { - ++ValueCount; - continue; // Ignore non-output constraints. - } + unsigned ValueCount = 0; + bool IsFirst = true; - assert(I->Codes.size() == 1 && "Too many asm constraint codes to handle"); - std::string C = InterpretASMConstraint(*I); - if (C.empty()) continue; + // Convert over all the output constraints. + for (InlineAsm::ConstraintInfoVector::iterator I = Constraints.begin(), + E = Constraints.end(); + I != E; ++I) { - if (!IsFirst) { - Out << ", "; - IsFirst = false; - } + if (I->Type != InlineAsm::isOutput) { + ++ValueCount; + continue; // Ignore non-output constraints. + } - // Unpack the dest. - Value *DestVal; - int DestValNo = -1; + assert(I->Codes.size() == 1 && "Too many asm constraint codes to handle"); + std::string C = InterpretASMConstraint(*I); + if (C.empty()) + continue; - if (ValueCount < ResultVals.size()) { - DestVal = ResultVals[ValueCount].first; - DestValNo = ResultVals[ValueCount].second; - } else - DestVal = CI.getArgOperand(ValueCount-ResultVals.size()); + if (!IsFirst) { + Out << ", "; + IsFirst = false; + } - if (I->isEarlyClobber) - C = "&"+C; + // Unpack the dest. + Value *DestVal; + int DestValNo = -1; - Out << "\"=" << C << "\"(" << GetValueName(DestVal); - if (DestValNo != -1) - Out << ".field" << DestValNo; // Multiple retvals. - Out << ")"; - ++ValueCount; - } + if (ValueCount < ResultVals.size()) { + DestVal = ResultVals[ValueCount].first; + DestValNo = ResultVals[ValueCount].second; + } else + DestVal = CI.getArgOperand(ValueCount - ResultVals.size()); + if (I->isEarlyClobber) + C = "&" + C; - // Convert over all the input constraints. - Out << "\n :"; - IsFirst = true; - ValueCount = 0; - for (InlineAsm::ConstraintInfoVector::iterator I = Constraints.begin(), - E = Constraints.end(); I != E; ++I) { - if (I->Type != InlineAsm::isInput) { - ++ValueCount; - continue; // Ignore non-input constraints. - } + Out << "\"=" << C << "\"(" << GetValueName(DestVal); + if (DestValNo != -1) + Out << ".field" << DestValNo; // Multiple retvals. + Out << ")"; + ++ValueCount; + } - assert(I->Codes.size() == 1 && "Too many asm constraint codes to handle"); - std::string C = InterpretASMConstraint(*I); - if (C.empty()) continue; + // Convert over all the input constraints. + Out << "\n :"; + IsFirst = true; + ValueCount = 0; + for (InlineAsm::ConstraintInfoVector::iterator I = Constraints.begin(), + E = Constraints.end(); + I != E; ++I) { + if (I->Type != InlineAsm::isInput) { + ++ValueCount; + continue; // Ignore non-input constraints. + } - if (!IsFirst) { - Out << ", "; - IsFirst = false; - } + assert(I->Codes.size() == 1 && "Too many asm constraint codes to handle"); + std::string C = InterpretASMConstraint(*I); + if (C.empty()) + continue; - assert(ValueCount >= ResultVals.size() && "Input can't refer to result"); - Value *SrcVal = CI.getArgOperand(ValueCount-ResultVals.size()); + if (!IsFirst) { + Out << ", "; + IsFirst = false; + } - Out << "\"" << C << "\"("; - if (!I->isIndirect) - writeOperand(SrcVal); - else - writeOperandDeref(SrcVal); - Out << ")"; - } + assert(ValueCount >= ResultVals.size() && "Input can't refer to result"); + Value *SrcVal = CI.getArgOperand(ValueCount - ResultVals.size()); - // Convert over the clobber constraints. - IsFirst = true; - for (InlineAsm::ConstraintInfoVector::iterator I = Constraints.begin(), - E = Constraints.end(); I != E; ++I) { - if (I->Type != InlineAsm::isClobber) - continue; // Ignore non-input constraints. + Out << "\"" << C << "\"("; + if (!I->isIndirect) + writeOperand(SrcVal); + else + writeOperandDeref(SrcVal); + Out << ")"; + } - assert(I->Codes.size() == 1 && "Too many asm constraint codes to handle"); - std::string C = InterpretASMConstraint(*I); - if (C.empty()) continue; + // Convert over the clobber constraints. + IsFirst = true; + for (InlineAsm::ConstraintInfoVector::iterator I = Constraints.begin(), + E = Constraints.end(); + I != E; ++I) { + if (I->Type != InlineAsm::isClobber) + continue; // Ignore non-input constraints. + + assert(I->Codes.size() == 1 && "Too many asm constraint codes to handle"); + std::string C = InterpretASMConstraint(*I); + if (C.empty()) + continue; - if (!IsFirst) { - Out << ", "; - IsFirst = false; - } + if (!IsFirst) { + Out << ", "; + IsFirst = false; + } - Out << '\"' << C << '"'; - } + Out << '\"' << C << '"'; + } - Out << ")"; + Out << ")"; } void CWriter::visitAllocaInst(AllocaInst &I) { - Out << '('; - printTypeName(Out, I.getType()); - Out << ") alloca(sizeof("; - printTypeName(Out, I.getType()->getElementType()); - if (I.isArrayAllocation()) { - Out << ") * (" ; - writeOperand(I.getArraySize(), ContextCasted); - } - Out << "))"; + Out << '('; + printTypeName(Out, I.getType()); + Out << ") alloca(sizeof("; + printTypeName(Out, I.getType()->getElementType()); + if (I.isArrayAllocation()) { + Out << ") * ("; + writeOperand(I.getArraySize(), ContextCasted); + } + Out << "))"; } void CWriter::printGEPExpression(Value *Ptr, gep_type_iterator I, - gep_type_iterator E, bool isArrayType, GetElementPtrInst *GEPI) { - DEBUG(errs() << "Printing GEP\n"); - DEBUG(errs() << "\tPtr: " << *Ptr << "\n"); - DEBUG(errs() << "\tGEPI: " << *GEPI <<"\n"); - // If there are no indices, just print out the pointer. - if (I == E) { - DEBUG(errs() << "I==E: Calling writeOperand()\n"); - writeOperand(Ptr); - return; - } + gep_type_iterator E, bool isArrayType, + GetElementPtrInst *GEPI) { + DEBUG(errs() << "Printing GEP\n"); + DEBUG(errs() << "\tPtr: " << *Ptr << "\n"); + DEBUG(errs() << "\tGEPI: " << *GEPI << "\n"); + // If there are no indices, just print out the pointer. + if (I == E) { + DEBUG(errs() << "I==E: Calling writeOperand()\n"); + writeOperand(Ptr); + return; + } - // Find out if the last index is into a vector. If so, we have to print this - // specially. Since vectors can't have elements of indexable type, only the - // last index could possibly be of a vector element. - VectorType *LastIndexIsVector = 0; - { - for (gep_type_iterator TmpI = I; TmpI != E; ++TmpI) - //LastIndexIsVector = dyn_cast<VectorType>(TmpI.getCurTy()); - // CHECK: This change needs thorough testing - LastIndexIsVector = dyn_cast<VectorType>(TmpI.getIndexedType()); - } - Out << "("; - - // If the last index is into a vector, we can't print it as &a[i][j] because - // we can't index into a vector with j in GCC. Instead, emit this as - // (((float*)&a[i])+j) - // TODO: this is no longer true now that we don't represent vectors using gcc-extentions - if (LastIndexIsVector) { - DEBUG(errs() << "LastIndexIsVector\n"); - Out << "(("; - printTypeName(Out, PointerType::getUnqual(LastIndexIsVector->getElementType())); - Out << ")("; - } - bool isArrayAccess = false; + // Find out if the last index is into a vector. If so, we have to print this + // specially. Since vectors can't have elements of indexable type, only the + // last index could possibly be of a vector element. + VectorType *LastIndexIsVector = 0; + { + for (gep_type_iterator TmpI = I; TmpI != E; ++TmpI) + // LastIndexIsVector = dyn_cast<VectorType>(TmpI.getCurTy()); + // CHECK: This change needs thorough testing + LastIndexIsVector = dyn_cast<VectorType>(TmpI.getIndexedType()); + } + Out << "("; + + // If the last index is into a vector, we can't print it as &a[i][j] because + // we can't index into a vector with j in GCC. Instead, emit this as + // (((float*)&a[i])+j) + // TODO: this is no longer true now that we don't represent vectors using + // gcc-extentions + if (LastIndexIsVector) { + DEBUG(errs() << "LastIndexIsVector\n"); + Out << "(("; + printTypeName(Out, + PointerType::getUnqual(LastIndexIsVector->getElementType())); + Out << ")("; + } + bool isArrayAccess = false; - if (GEPStack.size() > 0 && GEPStack.top() == GEPI) { - DEBUG(errs() << "Processing load-specific GEP\n"); - GEPStack.pop(); - isArrayAccess = true; - } else { - DEBUG(errs() << "I'm hereee!\n"); - Out << '&'; - } - DEBUG(errs() << "Here!\n"); - // If the first index is 0 (very typical) we can do a number of - // simplifications to clean up the code. - Value *FirstOp = I.getOperand(); - DEBUG(errs() << "FirstOp: " << *(I.getOperand()) << "\n"); - if (!isa<Constant>(FirstOp) || !cast<Constant>(FirstOp)->isNullValue()) { - DEBUG(errs() << "Calling writeoperand()\n"); - // First index isn't simple, print it the hard way. - writeOperand(Ptr, ContextNormal, isArrayAccess); - } else { - ++I; // Skip the zero index. - DEBUG(errs() << "Skipping zero index\n"); - - // Okay, emit the first operand. If Ptr is something that is already address - // exposed, like a global, avoid emitting (&foo)[0], just emit foo instead. - if (isAddressExposed(Ptr)) { - DEBUG(errs() << "Address exposed; calling writeoperandinternal()\n"); - writeOperandInternal(Ptr); - } - //else if (I != E && (I.getCurTy())->isStructTy()) { - // NOTE: This change needs to be tested more - else if (I != E && (I.isStruct()) ) { - DEBUG(errs() << "Not address exposed; is struct type\n"); - // If we didn't already emit the first operand, see if we can print it as - // P->f instead of "P[0].f" - writeOperand(Ptr); - Out << "->field" << cast<ConstantInt>(I.getOperand())->getZExtValue(); - ++I; // eat the struct index as well. - } else { - DEBUG(errs() << "In else; emitting *P\n"); - // Instead of emitting P[0][1], emit (*P)[1], which is more idiomatic. - Out << "(*"; - writeOperand(Ptr); - Out << ")"; - } - } + if (GEPStack.size() > 0 && GEPStack.top() == GEPI) { + DEBUG(errs() << "Processing load-specific GEP\n"); + GEPStack.pop(); + isArrayAccess = true; + } else { + DEBUG(errs() << "I'm hereee!\n"); + Out << '&'; + } + DEBUG(errs() << "Here!\n"); + // If the first index is 0 (very typical) we can do a number of + // simplifications to clean up the code. + Value *FirstOp = I.getOperand(); + DEBUG(errs() << "FirstOp: " << *(I.getOperand()) << "\n"); + if (!isa<Constant>(FirstOp) || !cast<Constant>(FirstOp)->isNullValue()) { + DEBUG(errs() << "Calling writeoperand()\n"); + // First index isn't simple, print it the hard way. + writeOperand(Ptr, ContextNormal, isArrayAccess); + } else { + ++I; // Skip the zero index. + DEBUG(errs() << "Skipping zero index\n"); + + // Okay, emit the first operand. If Ptr is something that is already address + // exposed, like a global, avoid emitting (&foo)[0], just emit foo instead. + if (isAddressExposed(Ptr)) { + DEBUG(errs() << "Address exposed; calling writeoperandinternal()\n"); + writeOperandInternal(Ptr); + } + // else if (I != E && (I.getCurTy())->isStructTy()) { + // NOTE: This change needs to be tested more + else if (I != E && (I.isStruct())) { + DEBUG(errs() << "Not address exposed; is struct type\n"); + // If we didn't already emit the first operand, see if we can print it as + // P->f instead of "P[0].f" + writeOperand(Ptr); + Out << "->field" << cast<ConstantInt>(I.getOperand())->getZExtValue(); + ++I; // eat the struct index as well. + } else { + DEBUG(errs() << "In else; emitting *P\n"); + // Instead of emitting P[0][1], emit (*P)[1], which is more idiomatic. + Out << "(*"; + writeOperand(Ptr); + Out << ")"; + } + } - Type *Agg = GEPI->getSourceElementType(); - unsigned CurIdx = 1; - for (; I != E; ++CurIdx, ++I) { - assert(I.getOperand()->getType()->isIntegerTy()); // TODO: indexing a Vector with a Vector is valid, but we don't support it here - DEBUG(errs() << "Type: " << *Agg << "; operand: " << *(I.getOperand()) << "\n"); - if ((Agg->isStructTy())){ - DEBUG(errs() << "Found a struct\n"); - Out << ".field" << cast<ConstantInt>(I.getOperand())->getZExtValue(); - } else if (Agg->isArrayTy()) { - DEBUG(errs() << "Found an array!\n"); - Out << ".array["; - writeOperandWithCast(I.getOperand(), Instruction::GetElementPtr); - Out << ']'; - } else if (!Agg->isVectorTy()) { - DEBUG(errs() << "Not a vector!\n"); - Out << '['; - writeOperandWithCast(I.getOperand(), Instruction::GetElementPtr); - Out << ']'; - } else { - DEBUG(errs() << "In else!\n"); - // If the last index is into a vector, then print it out as "+j)". This - // works with the 'LastIndexIsVector' code above. - if (isa<Constant>(I.getOperand()) && - cast<Constant>(I.getOperand())->isNullValue()) { - Out << "))"; // avoid "+0". - } else { - Out << ")+("; - writeOperandWithCast(I.getOperand(), Instruction::GetElementPtr); - Out << "))"; - } - } - CompositeType *CT = dyn_cast<CompositeType>(Agg); - if (!CT || CT->isPointerTy()) - { - DEBUG(errs() << "Something wrong!!\n"); - break; - } - Value* Index = GEPI->getOperand(CurIdx); - if (!CT->indexValid(Index)) - if (!CT || CT->isPointerTy()) - { - DEBUG(errs() << "Something wrong 2!!\n"); - break; - } - Agg = CT->getTypeAtIndex(Index); - } - Out << ")"; - DEBUG(errs() << "Leaving printGEPExpression\n"); - } + Type *Agg = GEPI->getSourceElementType(); + unsigned CurIdx = 1; + for (; I != E; ++CurIdx, ++I) { + assert(I.getOperand() + ->getType() + ->isIntegerTy()); // TODO: indexing a Vector with a Vector is + // valid, but we don't support it here + DEBUG(errs() << "Type: " << *Agg << "; operand: " << *(I.getOperand()) + << "\n"); + if ((Agg->isStructTy())) { + DEBUG(errs() << "Found a struct\n"); + Out << ".field" << cast<ConstantInt>(I.getOperand())->getZExtValue(); + } else if (Agg->isArrayTy()) { + DEBUG(errs() << "Found an array!\n"); + Out << ".array["; + writeOperandWithCast(I.getOperand(), Instruction::GetElementPtr); + Out << ']'; + } else if (!Agg->isVectorTy()) { + DEBUG(errs() << "Not a vector!\n"); + Out << '['; + writeOperandWithCast(I.getOperand(), Instruction::GetElementPtr); + Out << ']'; + } else { + DEBUG(errs() << "In else!\n"); + // If the last index is into a vector, then print it out as "+j)". This + // works with the 'LastIndexIsVector' code above. + if (isa<Constant>(I.getOperand()) && + cast<Constant>(I.getOperand())->isNullValue()) { + Out << "))"; // avoid "+0". + } else { + Out << ")+("; + writeOperandWithCast(I.getOperand(), Instruction::GetElementPtr); + Out << "))"; + } + } + CompositeType *CT = dyn_cast<CompositeType>(Agg); + if (!CT || CT->isPointerTy()) { + DEBUG(errs() << "Something wrong!!\n"); + break; + } + Value *Index = GEPI->getOperand(CurIdx); + if (!CT->indexValid(Index)) + if (!CT || CT->isPointerTy()) { + DEBUG(errs() << "Something wrong 2!!\n"); + break; + } + Agg = CT->getTypeAtIndex(Index); + } + Out << ")"; + DEBUG(errs() << "Leaving printGEPExpression\n"); +} - void CWriter::writeMemoryAccess(Value *Operand, Type *OperandType, - bool IsVolatile, unsigned Alignment /*bytes*/) { - DEBUG(errs() << *OperandType << "; " << *Operand << "\n"); - bool arrayAccess = false; - if(isa<GetElementPtrInst>(Operand)) { - DEBUG(errs() << "ISA Get Element Pointer!\n"); - arrayAccess = true; - GEPStack.push(dyn_cast<GetElementPtrInst>(Operand)); - } - // if (isAddressExposed(Operand)) { - // DEBUG(errs() << "Is address exposed!!\n"); - // writeOperandInternal(Operand); - // return; - // } - - bool IsUnaligned = Alignment && - Alignment < TD->getABITypeAlignment(OperandType); - if (!arrayAccess) { - if (!IsUnaligned) - Out << '*'; - - else if (IsUnaligned) { - Out << "__UNALIGNED_LOAD__("; - printTypeNameUnaligned(Out, OperandType, false); - if (IsVolatile) Out << " volatile"; - Out << ", " << Alignment << ", "; - } - - else if (IsVolatile) { - Out << "("; - printTypeName(Out, OperandType, false); - Out << "volatile"; - Out << "*)"; - } - } +void CWriter::writeMemoryAccess(Value *Operand, Type *OperandType, + bool IsVolatile, unsigned Alignment /*bytes*/) { + DEBUG(errs() << *OperandType << "; " << *Operand << "\n"); + bool arrayAccess = false; + if (isa<GetElementPtrInst>(Operand)) { + DEBUG(errs() << "ISA Get Element Pointer!\n"); + arrayAccess = true; + GEPStack.push(dyn_cast<GetElementPtrInst>(Operand)); + } + // if (isAddressExposed(Operand)) { + // DEBUG(errs() << "Is address exposed!!\n"); + // writeOperandInternal(Operand); + // return; + // } - writeOperand(Operand,ContextNormal, arrayAccess ); + bool IsUnaligned = + Alignment && Alignment < TD->getABITypeAlignment(OperandType); + if (!arrayAccess) { + if (!IsUnaligned) + Out << '*'; + + else if (IsUnaligned) { + Out << "__UNALIGNED_LOAD__("; + printTypeNameUnaligned(Out, OperandType, false); + if (IsVolatile) + Out << " volatile"; + Out << ", " << Alignment << ", "; + } - if (IsUnaligned) { - Out << ")"; - } - } + else if (IsVolatile) { + Out << "("; + printTypeName(Out, OperandType, false); + Out << "volatile"; + Out << "*)"; + } + } - void CWriter::visitLoadInst(LoadInst &I) { - DEBUG(errs() << "Visiting Load instruction!\n"); - DEBUG(errs() << "Visiting load: " << I << "\n"); - writeMemoryAccess(I.getOperand(0), I.getType(), I.isVolatile(), - I.getAlignment()); + writeOperand(Operand, ContextNormal, arrayAccess); - } + if (IsUnaligned) { + Out << ")"; + } +} - void CWriter::visitStoreInst(StoreInst &I) { - DEBUG(errs() << "Visiting store instruction!\n"); - writeMemoryAccess(I.getPointerOperand(), I.getOperand(0)->getType(), - I.isVolatile(), I.getAlignment()); - Out << " = "; - Value *Operand = I.getOperand(0); - unsigned BitMask = 0; - if (IntegerType* ITy = dyn_cast<IntegerType>(Operand->getType())) - if (!ITy->isPowerOf2ByteWidth()) - // We have a bit width that doesn't match an even power-of-2 byte - // size. Consequently we must & the value with the type's bit mask - BitMask = ITy->getBitMask(); - if (BitMask) - Out << "(("; - writeOperand(Operand, BitMask ? ContextNormal : ContextCasted); - if (BitMask) - Out << ") & " << BitMask << ")"; - } +void CWriter::visitLoadInst(LoadInst &I) { + DEBUG(errs() << "Visiting Load instruction!\n"); + DEBUG(errs() << "Visiting load: " << I << "\n"); + writeMemoryAccess(I.getOperand(0), I.getType(), I.isVolatile(), + I.getAlignment()); +} - void CWriter::visitGetElementPtrInst(GetElementPtrInst &I) { - DEBUG(errs() <<"Visiting GEP: " << I << "\n"); - printGEPExpression(I.getPointerOperand(), gep_type_begin(I), - gep_type_end(I), I.getSourceElementType()->isArrayTy(), &I); - } +void CWriter::visitStoreInst(StoreInst &I) { + DEBUG(errs() << "Visiting store instruction!\n"); + writeMemoryAccess(I.getPointerOperand(), I.getOperand(0)->getType(), + I.isVolatile(), I.getAlignment()); + Out << " = "; + Value *Operand = I.getOperand(0); + unsigned BitMask = 0; + if (IntegerType *ITy = dyn_cast<IntegerType>(Operand->getType())) + if (!ITy->isPowerOf2ByteWidth()) + // We have a bit width that doesn't match an even power-of-2 byte + // size. Consequently we must & the value with the type's bit mask + BitMask = ITy->getBitMask(); + if (BitMask) + Out << "(("; + writeOperand(Operand, BitMask ? ContextNormal : ContextCasted); + if (BitMask) + Out << ") & " << BitMask << ")"; +} - void CWriter::visitVAArgInst(VAArgInst &I) { - Out << "va_arg(*(va_list*)"; - writeOperand(I.getOperand(0), ContextCasted); - Out << ", "; - printTypeName(Out, I.getType()); - Out << ");\n "; - } +void CWriter::visitGetElementPtrInst(GetElementPtrInst &I) { + DEBUG(errs() << "Visiting GEP: " << I << "\n"); + printGEPExpression(I.getPointerOperand(), gep_type_begin(I), gep_type_end(I), + I.getSourceElementType()->isArrayTy(), &I); +} - void CWriter::visitInsertElementInst(InsertElementInst &I) { - // Start by copying the entire aggregate value into the result variable. - writeOperand(I.getOperand(0)); - Type *EltTy = I.getType()->getElementType(); - assert(I.getOperand(1)->getType() == EltTy); - if (isEmptyType(EltTy)) return; - - // Then do the insert to update the field. - Out << ";\n "; - Out << GetValueName(&I) << ".vector["; - writeOperand(I.getOperand(2)); - Out << "] = "; - writeOperand(I.getOperand(1), ContextCasted); - } +void CWriter::visitVAArgInst(VAArgInst &I) { + Out << "va_arg(*(va_list*)"; + writeOperand(I.getOperand(0), ContextCasted); + Out << ", "; + printTypeName(Out, I.getType()); + Out << ");\n "; +} - void CWriter::visitExtractElementInst(ExtractElementInst &I) { - assert(!isEmptyType(I.getType())); - if (isa<UndefValue>(I.getOperand(0))) { - Out << "("; - printTypeName(Out, I.getType()); - Out << ") 0/*UNDEF*/"; - } else { - Out << "("; - writeOperand(I.getOperand(0)); - Out << ").vector["; - writeOperand(I.getOperand(1)); - Out << "]"; - } - } +void CWriter::visitInsertElementInst(InsertElementInst &I) { + // Start by copying the entire aggregate value into the result variable. + writeOperand(I.getOperand(0)); + Type *EltTy = I.getType()->getElementType(); + assert(I.getOperand(1)->getType() == EltTy); + if (isEmptyType(EltTy)) + return; - // <result> = shufflevector <n x <ty>> <v1>, <n x <ty>> <v2>, <m x i32> <mask> - // ; yields <m x <ty>> - void CWriter::visitShuffleVectorInst(ShuffleVectorInst &SVI) { - VectorType *VT = SVI.getType(); - Type *EltTy = VT->getElementType(); - VectorType *InputVT = cast<VectorType>(SVI.getOperand(0)->getType()); - assert(!isEmptyType(VT)); - assert(InputVT->getElementType() == VT->getElementType()); - - CtorDeclTypes.insert(VT); - Out << "llvm_ctor_"; - printTypeString(Out, VT, false); - Out << "("; - - Constant *Zero = Constant::getNullValue(EltTy); - unsigned NumElts = VT->getNumElements(); - unsigned NumInputElts = InputVT->getNumElements(); // n - for (unsigned i = 0; i != NumElts; ++i) { - if (i) Out << ", "; - int SrcVal = SVI.getMaskValue(i); - if ((unsigned)SrcVal >= NumInputElts * 2) { - Out << "/*undef*/"; - printConstant(Zero, ContextCasted); - } else { - // If SrcVal belongs [0, n - 1], it extracts value from <v1> - // If SrcVal belongs [n, 2 * n - 1], it extracts value from <v2> - // In C++, the value false is converted to zero and the value true is - // converted to one - Value *Op = SVI.getOperand((unsigned)SrcVal >= NumInputElts); - if (isa<Instruction>(Op)) { - // Do an extractelement of this value from the appropriate input. - Out << "("; - writeOperand(Op); - Out << ").vector["; - Out << ((unsigned)SrcVal >= NumInputElts ? SrcVal - NumInputElts : SrcVal); - Out << "]"; - } else if (isa<ConstantAggregateZero>(Op) || isa<UndefValue>(Op)) { - printConstant(Zero, ContextCasted); - } else { - printConstant(cast<ConstantVector>(Op)->getOperand(SrcVal & - (NumElts-1)), - ContextNormal); - } - } - } - Out << ")"; - } + // Then do the insert to update the field. + Out << ";\n "; + Out << GetValueName(&I) << ".vector["; + writeOperand(I.getOperand(2)); + Out << "] = "; + writeOperand(I.getOperand(1), ContextCasted); +} - void CWriter::visitInsertValueInst(InsertValueInst &IVI) { - // Start by copying the entire aggregate value into the result variable. - writeOperand(IVI.getOperand(0)); - Type *EltTy = IVI.getOperand(1)->getType(); - if (isEmptyType(EltTy)) return; - - // Then do the insert to update the field. - Out << ";\n "; - Out << GetValueName(&IVI); - for (const unsigned *b = IVI.idx_begin(), *i = b, *e = IVI.idx_end(); - i != e; ++i) { - Type *IndexedTy = - ExtractValueInst::getIndexedType(IVI.getOperand(0)->getType(), - makeArrayRef(b, i)); - assert(IndexedTy); - if (IndexedTy->isArrayTy()) - Out << ".array[" << *i << "]"; - else - Out << ".field" << *i; - } - Out << " = "; - writeOperand(IVI.getOperand(1), ContextCasted); - } +void CWriter::visitExtractElementInst(ExtractElementInst &I) { + assert(!isEmptyType(I.getType())); + if (isa<UndefValue>(I.getOperand(0))) { + Out << "("; + printTypeName(Out, I.getType()); + Out << ") 0/*UNDEF*/"; + } else { + Out << "("; + writeOperand(I.getOperand(0)); + Out << ").vector["; + writeOperand(I.getOperand(1)); + Out << "]"; + } +} - void CWriter::visitExtractValueInst(ExtractValueInst &EVI) { - Out << "("; - if (isa<UndefValue>(EVI.getOperand(0))) { - Out << "("; - printTypeName(Out, EVI.getType()); - Out << ") 0/*UNDEF*/"; - } else { - writeOperand(EVI.getOperand(0)); - for (const unsigned *b = EVI.idx_begin(), *i = b, *e = EVI.idx_end(); - i != e; ++i) { - Type *IndexedTy = - ExtractValueInst::getIndexedType(EVI.getOperand(0)->getType(), - makeArrayRef(b, i)); - if (IndexedTy->isArrayTy()) - Out << ".array[" << *i << "]"; - else - Out << ".field" << *i; - } - } - Out << ")"; - } +// <result> = shufflevector <n x <ty>> <v1>, <n x <ty>> <v2>, <m x i32> <mask> +// ; yields <m x <ty>> +void CWriter::visitShuffleVectorInst(ShuffleVectorInst &SVI) { + VectorType *VT = SVI.getType(); + Type *EltTy = VT->getElementType(); + VectorType *InputVT = cast<VectorType>(SVI.getOperand(0)->getType()); + assert(!isEmptyType(VT)); + assert(InputVT->getElementType() == VT->getElementType()); + + CtorDeclTypes.insert(VT); + Out << "llvm_ctor_"; + printTypeString(Out, VT, false); + Out << "("; + + Constant *Zero = Constant::getNullValue(EltTy); + unsigned NumElts = VT->getNumElements(); + unsigned NumInputElts = InputVT->getNumElements(); // n + for (unsigned i = 0; i != NumElts; ++i) { + if (i) + Out << ", "; + int SrcVal = SVI.getMaskValue(i); + if ((unsigned)SrcVal >= NumInputElts * 2) { + Out << "/*undef*/"; + printConstant(Zero, ContextCasted); + } else { + // If SrcVal belongs [0, n - 1], it extracts value from <v1> + // If SrcVal belongs [n, 2 * n - 1], it extracts value from <v2> + // In C++, the value false is converted to zero and the value true is + // converted to one + Value *Op = SVI.getOperand((unsigned)SrcVal >= NumInputElts); + if (isa<Instruction>(Op)) { + // Do an extractelement of this value from the appropriate input. + Out << "("; + writeOperand(Op); + Out << ").vector["; + Out << ((unsigned)SrcVal >= NumInputElts ? SrcVal - NumInputElts + : SrcVal); + Out << "]"; + } else if (isa<ConstantAggregateZero>(Op) || isa<UndefValue>(Op)) { + printConstant(Zero, ContextCasted); + } else { + printConstant( + cast<ConstantVector>(Op)->getOperand(SrcVal & (NumElts - 1)), + ContextNormal); + } + } + } + Out << ")"; +} + +void CWriter::visitInsertValueInst(InsertValueInst &IVI) { + // Start by copying the entire aggregate value into the result variable. + writeOperand(IVI.getOperand(0)); + Type *EltTy = IVI.getOperand(1)->getType(); + if (isEmptyType(EltTy)) + return; + + // Then do the insert to update the field. + Out << ";\n "; + Out << GetValueName(&IVI); + for (const unsigned *b = IVI.idx_begin(), *i = b, *e = IVI.idx_end(); i != e; + ++i) { + Type *IndexedTy = ExtractValueInst::getIndexedType( + IVI.getOperand(0)->getType(), makeArrayRef(b, i)); + assert(IndexedTy); + if (IndexedTy->isArrayTy()) + Out << ".array[" << *i << "]"; + else + Out << ".field" << *i; + } + Out << " = "; + writeOperand(IVI.getOperand(1), ContextCasted); +} - //===----------------------------------------------------------------------===// - // External Interface declaration - //===----------------------------------------------------------------------===// +void CWriter::visitExtractValueInst(ExtractValueInst &EVI) { + Out << "("; + if (isa<UndefValue>(EVI.getOperand(0))) { + Out << "("; + printTypeName(Out, EVI.getType()); + Out << ") 0/*UNDEF*/"; + } else { + writeOperand(EVI.getOperand(0)); + for (const unsigned *b = EVI.idx_begin(), *i = b, *e = EVI.idx_end(); + i != e; ++i) { + Type *IndexedTy = ExtractValueInst::getIndexedType( + EVI.getOperand(0)->getType(), makeArrayRef(b, i)); + if (IndexedTy->isArrayTy()) + Out << ".array[" << *i << "]"; + else + Out << ".field" << *i; + } + } + Out << ")"; +} - bool CTargetMachine::addPassesToEmitFile(PassManagerBase &PM, - raw_pwrite_stream &Out, - raw_pwrite_stream *Out2, - CodeGenFileType FileType, - bool DisableVerify, - MachineModuleInfo *MMI){ +//===----------------------------------------------------------------------===// +// External Interface declaration +//===----------------------------------------------------------------------===// - if (FileType != TargetMachine::CGFT_AssemblyFile) return true; +bool CTargetMachine::addPassesToEmitFile( + PassManagerBase &PM, raw_pwrite_stream &Out, raw_pwrite_stream *Out2, + CodeGenFileType FileType, bool DisableVerify, MachineModuleInfo *MMI) { - PM.add(createGCLoweringPass()); - PM.add(createLowerInvokePass()); - PM.add(createCFGSimplificationPass()); // clean up after lower invoke. - PM.add(new CWriter(Out)); - return false; - } + if (FileType != TargetMachine::CGFT_AssemblyFile) + return true; + + PM.add(createGCLoweringPass()); + PM.add(createLowerInvokePass()); + PM.add(createCFGSimplificationPass()); // clean up after lower invoke. + PM.add(new CWriter(Out)); + return false; +} diff --git a/hpvm/projects/llvm-cbe/test/APInt-C.cpp b/hpvm/projects/llvm-cbe/test/APInt-C.cpp index c44440985a0b50a57bd25e1995d39cd904ec32c5..d37b2a4f799fb28cba55d85bb3048b189885a357 100644 --- a/hpvm/projects/llvm-cbe/test/APInt-C.cpp +++ b/hpvm/projects/llvm-cbe/test/APInt-C.cpp @@ -22,12 +22,12 @@ inline uint64_t RoundUpToAlignment(uint64_t Value, uint64_t Align, #define CREATE(s) \ APInt s; \ if ((numbits % integerPartWidth) != 0) { \ - /* use LLT_ALIGN to round the memory area up to the nearest \ \ + /* use LLT_ALIGN to round the memory area up to the nearest \ \ \ \ * integerPart-sized chunk */ \ unsigned nbytes = \ RoundUpToAlignment(numbits, integerPartWidth) / host_char_bit; \ integerPart *data_a64 = (integerPart *)alloca(nbytes); \ - /* TODO: this memcpy assumes little-endian, \ for big-endian, need to \ + /* TODO: this memcpy assumes little-endian, \ for big-endian, need to \ \ \ * align the copy to the other end */ \ memcpy(data_a64, p##s, \ RoundUpToAlignment(numbits, host_char_bit) / host_char_bit); \ diff --git a/hpvm/projects/llvm-cbe/tools/llvm-cbe/llvm-cbe.cpp~ b/hpvm/projects/llvm-cbe/tools/llvm-cbe/llvm-cbe.cpp~ index a86a11ce6fcc144055b168739b4de0110b05ae0c..79125a86ec523a4674b222ea1263735aed93765f 100644 --- a/hpvm/projects/llvm-cbe/tools/llvm-cbe/llvm-cbe.cpp~ +++ b/hpvm/projects/llvm-cbe/tools/llvm-cbe/llvm-cbe.cpp~ @@ -61,19 +61,19 @@ extern "C" void LLVMInitializeCBackendTargetMC(); // and back-end code generation options are specified with the target machine. // static cl::opt<std::string> -InputFilename(cl::Positional, cl::desc("<input bitcode>"), cl::init("-")); + InputFilename(cl::Positional, cl::desc("<input bitcode>"), cl::init("-")); -static cl::opt<std::string> -OutputFilename("o", cl::desc("Output filename"), cl::value_desc("filename")); +static cl::opt<std::string> OutputFilename("o", cl::desc("Output filename"), + cl::value_desc("filename")); static cl::opt<unsigned> -TimeCompilations("time-compilations", cl::Hidden, cl::init(1u), - cl::value_desc("N"), - cl::desc("Repeat compilation N times for timing")); + TimeCompilations("time-compilations", cl::Hidden, cl::init(1u), + cl::value_desc("N"), + cl::desc("Repeat compilation N times for timing")); static cl::opt<bool> -NoIntegratedAssembler("no-integrated-as", cl::Hidden, - cl::desc("Disable integrated assembler")); + NoIntegratedAssembler("no-integrated-as", cl::Hidden, + cl::desc("Disable integrated assembler")); static cl::opt<bool> PreserveComments("preserve-as-comments", cl::Hidden, @@ -82,21 +82,20 @@ static cl::opt<bool> // Determine optimization level. static cl::opt<char> -OptLevel("O", - cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] " - "(default = '-O2')"), - cl::Prefix, - cl::ZeroOrMore, - cl::init(' ')); + OptLevel("O", + cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] " + "(default = '-O2')"), + cl::Prefix, cl::ZeroOrMore, cl::init(' ')); static cl::opt<std::string> -TargetTriple("mtriple", cl::desc("Override target triple for module")); + TargetTriple("mtriple", cl::desc("Override target triple for module")); static cl::opt<bool> NoVerify("disable-verify", cl::Hidden, cl::desc("Do not verify input module")); -static cl::opt<bool> DisableSimplifyLibCalls("disable-simplify-libcalls", - cl::desc("Disable simplify-libcalls")); +static cl::opt<bool> + DisableSimplifyLibCalls("disable-simplify-libcalls", + cl::desc("Disable simplify-libcalls")); static cl::opt<bool> ShowMCEncoding("show-mc-encoding", cl::Hidden, cl::desc("Show encoding in .s output")); @@ -120,14 +119,13 @@ static cl::opt<bool> DiscardValueNames( cl::desc("Discard names from Value (other than GlobalValue)."), cl::init(false), cl::Hidden); -static cl::opt<std::string> StopAfter("stop-after", - cl::desc("Stop compilation after a specific pass"), - cl::value_desc("pass-name"), - cl::init("")); -static cl::opt<std::string> StartAfter("start-after", - cl::desc("Resume compilation after a specific pass"), - cl::value_desc("pass-name"), - cl::init("")); +static cl::opt<std::string> + StopAfter("stop-after", cl::desc("Stop compilation after a specific pass"), + cl::value_desc("pass-name"), cl::init("")); +static cl::opt<std::string> + StartAfter("start-after", + cl::desc("Resume compilation after a specific pass"), + cl::value_desc("pass-name"), cl::init("")); namespace { static ManagedStatic<std::vector<std::string>> RunPassNames; @@ -142,7 +140,7 @@ struct RunPassOption { RunPassNames->push_back(PassName); } }; -} +} // namespace static RunPassOption RunPassOpt; @@ -153,9 +151,9 @@ static cl::opt<RunPassOption, true, cl::parser<std::string>> RunPass( static int compileModule(char **, LLVMContext &); -static std::unique_ptr<tool_output_file> -GetOutputStream(const char *TargetName, Triple::OSType OS, - const char *ProgName) { +static std::unique_ptr<tool_output_file> GetOutputStream(const char *TargetName, + Triple::OSType OS, + const char *ProgName) { // If we don't yet have an output filename, make one. if (OutputFilename.empty()) { if (InputFilename == "-") @@ -175,7 +173,7 @@ GetOutputStream(const char *TargetName, Triple::OSType OS, if (TargetName[0] == 'c') { if (TargetName[1] == 0) OutputFilename += ".cl"; -// OutputFilename += ".cbe.c"; + // OutputFilename += ".cbe.c"; else if (TargetName[1] == 'p' && TargetName[2] == 'p') OutputFilename += ".cpp"; else @@ -212,8 +210,8 @@ GetOutputStream(const char *TargetName, Triple::OSType OS, sys::fs::OpenFlags OpenFlags = sys::fs::F_None; if (!Binary) OpenFlags |= sys::fs::F_Text; - auto FDOut = llvm::make_unique<tool_output_file>(OutputFilename, EC, - OpenFlags); + auto FDOut = + llvm::make_unique<tool_output_file>(OutputFilename, EC, OpenFlags); if (EC) { errs() << EC.message() << '\n'; return nullptr; @@ -243,7 +241,7 @@ int main(int argc, char **argv) { EnableDebugBuffering = true; LLVMContext Context; - llvm_shutdown_obj Y; // Call llvm_shutdown() on exit. + llvm_shutdown_obj Y; // Call llvm_shutdown() on exit. // Initialize targets first, so that --version shows registered targets. InitializeAllTargets(); @@ -267,7 +265,7 @@ int main(int argc, char **argv) { initializeScalarEvolutionWrapperPassPass(*Registry); initializeDominatorTreeWrapperPassPass(*Registry); initializeAssumptionCacheTrackerPass(*Registry); - //initializeUnreachableBlockElimLegacyPassPass(*Registry); + // initializeUnreachableBlockElimLegacyPassPass(*Registry); // Register the target printer for --version. cl::AddExtraVersionPrinter(TargetRegistry::printRegisteredTargetsForVersion); @@ -288,8 +286,8 @@ int main(int argc, char **argv) { return 0; } -static bool addPass(PassManagerBase &PM, const char *argv0, - StringRef PassName, TargetPassConfig &TPC) { +static bool addPass(PassManagerBase &PM, const char *argv0, StringRef PassName, + TargetPassConfig &TPC) { if (PassName == "none") return false; @@ -323,17 +321,17 @@ static int compileModule(char **argv, LLVMContext &Context) { std::unique_ptr<MIRParser> MIR; Triple TheTriple; - bool SkipModule = MCPU == "help" || - (!MAttrs.empty() && MAttrs.front() == "help"); + bool SkipModule = + MCPU == "help" || (!MAttrs.empty() && MAttrs.front() == "help"); // If user just wants to list available options, skip module loading if (!SkipModule) { - //if (StringRef(InputFilename).endswith_lower(".mir")) { - //MIR = createMIRParserFromFile(InputFilename, Err, Context); - //if (MIR) - //M = MIR->parseLLVMModule(); + // if (StringRef(InputFilename).endswith_lower(".mir")) { + // MIR = createMIRParserFromFile(InputFilename, Err, Context); + // if (MIR) + // M = MIR->parseLLVMModule(); //} else - M = parseIRFile(InputFilename, Err, Context); + M = parseIRFile(InputFilename, Err, Context); if (!M) { Err.print(argv[0], errs()); return 1; @@ -361,9 +359,9 @@ static int compileModule(char **argv, LLVMContext &Context) { // Get the target specific parser. std::string Error; // Override MArch - MArch = "c"; //FIX ME - const Target *TheTarget = TargetRegistry::lookupTarget(MArch, TheTriple, - Error); + MArch = "c"; // FIX ME + const Target *TheTarget = + TargetRegistry::lookupTarget(MArch, TheTriple, Error); if (!TheTarget) { errs() << argv[0] << ": " << Error; return 1; @@ -376,11 +374,20 @@ static int compileModule(char **argv, LLVMContext &Context) { default: errs() << argv[0] << ": invalid optimization level.\n"; return 1; - case ' ': break; - case '0': OLvl = CodeGenOpt::None; break; - case '1': OLvl = CodeGenOpt::Less; break; - case '2': OLvl = CodeGenOpt::Default; break; - case '3': OLvl = CodeGenOpt::Aggressive; break; + case ' ': + break; + case '0': + OLvl = CodeGenOpt::None; + break; + case '1': + OLvl = CodeGenOpt::Less; + break; + case '2': + OLvl = CodeGenOpt::Default; + break; + case '3': + OLvl = CodeGenOpt::Aggressive; + break; } TargetOptions Options = InitTargetOptionsFromCodeGenFlags(); @@ -390,7 +397,7 @@ static int compileModule(char **argv, LLVMContext &Context) { Options.MCOptions.AsmVerbose = AsmVerbose; Options.MCOptions.PreserveAsmComments = PreserveComments; -// std::unique_ptr<TargetMachine> Target( + // std::unique_ptr<TargetMachine> Target( TargetMachine *Target( TheTarget->createTargetMachine(TheTriple.getTriple(), CPUStr, FeaturesStr, Options, getRelocModel(), CMModel, OLvl)); @@ -410,7 +417,8 @@ static int compileModule(char **argv, LLVMContext &Context) { // Figure out where we are going to send the output. std::unique_ptr<tool_output_file> Out = GetOutputStream(TheTarget->getName(), TheTriple.getOS(), argv[0]); - if (!Out) return 1; + if (!Out) + return 1; // Build up all of the passes that we want to do to the module. legacy::PassManager PM; @@ -433,7 +441,7 @@ static int compileModule(char **argv, LLVMContext &Context) { if (RelaxAll.getNumOccurrences() > 0 && FileType != TargetMachine::CGFT_ObjectFile) errs() << argv[0] - << ": warning: ignoring -mc-relax-all because filetype != obj"; + << ": warning: ignoring -mc-relax-all because filetype != obj"; { raw_pwrite_stream *OS = &Out->os(); @@ -455,24 +463,25 @@ static int compileModule(char **argv, LLVMContext &Context) { const PassRegistry *PR = PassRegistry::getPassRegistry(); if (!RunPassNames->empty()) { if (!StartAfter.empty() || !StopAfter.empty()) { - errs() << argv[0] << ": start-after and/or stop-after passes are " - "redundant when run-pass is specified.\n"; + errs() << argv[0] + << ": start-after and/or stop-after passes are " + "redundant when run-pass is specified.\n"; return 1; } if (!MIR) { errs() << argv[0] << ": run-pass needs a .mir input.\n"; return 1; } - LLVMTargetMachine *LLVMTM = static_cast<LLVMTargetMachine*>(Target); + LLVMTargetMachine *LLVMTM = static_cast<LLVMTargetMachine *>(Target); TargetPassConfig *TPC = LLVMTM->createPassConfig(PM); PM.add(TPC); - -// LLVMTM.addMachineModuleInfo(PM); -// LLVMTM.addMachineFunctionAnalysis(PM, MIR.get()); + + // LLVMTM.addMachineModuleInfo(PM); + // LLVMTM.addMachineFunctionAnalysis(PM, MIR.get()); MachineModuleInfo *MMI = new MachineModuleInfo(LLVMTM); MMI->setMachineFunctionInitializer(MIR.get()); PM.add(MMI); - + TPC->printAndVerify(""); for (const std::string &RunPassName : *RunPassNames) { @@ -554,4 +563,4 @@ static int compileModule(char **argv, LLVMContext &Context) { Out->keep(); return 0; -} \ No newline at end of file +} \ No newline at end of file diff --git a/hpvm/projects/visc-rt/CMakeLists.txt b/hpvm/projects/visc-rt/CMakeLists.txt deleted file mode 100644 index 0395624253e4bc2b62e9eca51bd98a1a6a86436e..0000000000000000000000000000000000000000 --- a/hpvm/projects/visc-rt/CMakeLists.txt +++ /dev/null @@ -1,21 +0,0 @@ -add_definitions(-DNUM_CORES=8) - -SET(CMAKE_C_COMPILER ${CMAKE_BINARY_DIR}/bin/clang) -SET(CMAKE_CXX_COMPILER ${CMAKE_BINARY_DIR}/bin/clang++) - -add_llvm_library(visc-rt.ll visc-rt.cpp - - DEPENDS - clang - ) - - -target_compile_options(visc-rt.ll PUBLIC -flto ) -target_compile_options(visc-rt.ll PUBLIC -std=c++11) - -add_custom_target(visc-rt.cpp.o ALL - COMMAND ar -x ${CMAKE_BINARY_DIR}/lib/libvisc-rt.ll.a - COMMAND mv ${CMAKE_BINARY_DIR}/tools/hpvm/projects/visc-rt/visc-rt.cpp.o ${CMAKE_BINARY_DIR}/tools/hpvm/projects/visc-rt/visc-rt.bc - ) - -add_dependencies(visc-rt.cpp.o visc-rt.ll) diff --git a/hpvm/test/CTestSuite/Makefile b/hpvm/test/CTestSuite/Makefile index 226a83287d743360d9cd64a7c57e864871829b0b..1169e4e896a861975ac0562ebff8b208828bbf89 100644 --- a/hpvm/test/CTestSuite/Makefile +++ b/hpvm/test/CTestSuite/Makefile @@ -9,7 +9,7 @@ LLVM_CC:=$(LLVM_INSTALL)/bin/clang LLVM_OPT:=$(LLVM_INSTALL)/bin/opt BUILD_DIR:=build -all: $(BUILD_DIR) $(HOST:%=$(BUILD_DIR)/%.ll) $(HOST:%=$(BUILD_DIR)/%.visc.ll) +all: $(BUILD_DIR) $(HOST:%=$(BUILD_DIR)/%.ll) $(HOST:%=$(BUILD_DIR)/%.hpvm.ll) $(BUILD_DIR): mkdir -p $(BUILD_DIR) @@ -17,10 +17,10 @@ $(BUILD_DIR): $(HOST:%=$(BUILD_DIR)/%.ll):$(BUILD_DIR)/%.ll:%.c $(LLVM_CC) -S -emit-llvm $< -O3 -o $@ -$(HOST:%=$(BUILD_DIR)/%.visc.ll):$(BUILD_DIR)/%.visc.ll:$(BUILD_DIR)/%.ll - $(LLVM_OPT) -load $(LLVM_SRC_ROOT)/Release+Asserts/lib/LLVMGenVISC.so -genvisc -globaldce $< -S -o $@ +$(HOST:%=$(BUILD_DIR)/%.hpvm.ll):$(BUILD_DIR)/%.hpvm.ll:$(BUILD_DIR)/%.ll + $(LLVM_OPT) -load $(LLVM_SRC_ROOT)/Release+Asserts/lib/LLVMGenHPVM.so -genhpvm -globaldce $< -S -o $@ @cat RUN.script $@ > $@.tmp @mv $@.tmp $@ clean : - rm -f $(HOST:%=$(BUILD_DIR)/%.ll) $(HOST:%=$(BUILD_DIR)/%.visc.ll) $(HOST:%=$(BUILD_DIR)/%.visc.ll.kernels.ll) $(HOST:%=$(BUILD_DIR)/%.visc.ll.nvptx.s) $(BUILD_DIR)/DataflowGraph.dot* + rm -f $(HOST:%=$(BUILD_DIR)/%.ll) $(HOST:%=$(BUILD_DIR)/%.hpvm.ll) $(HOST:%=$(BUILD_DIR)/%.hpvm.ll.kernels.ll) $(HOST:%=$(BUILD_DIR)/%.hpvm.ll.nvptx.s) $(BUILD_DIR)/DataflowGraph.dot* diff --git a/hpvm/test/CTestSuite/RUN.script b/hpvm/test/CTestSuite/RUN.script index 10bf667818824719af2e041fc6b2dc3e449d9158..23fa1694ebf4b7448c731327b96b949c0509b62e 100644 --- a/hpvm/test/CTestSuite/RUN.script +++ b/hpvm/test/CTestSuite/RUN.script @@ -1,6 +1,6 @@ ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_NVPTX.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-nvptx -dfg2llvm-x86 -clearDFG -o %t.ll -S %s ; RUN: llvm-link %llvm_src/../libclc/built_libs/nvptx--nvidiacl.bc %s.kernels.ll -o %t.ll.kernels.linked.bc ; RUN: clang -O3 -target nvptx %t.ll.kernels.linked.bc -S -o %s.nvptx.s -; RUN: llvm-link %t.ll %llvm_src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll +; RUN: llvm-link %t.ll %llvm_src/projects/hpvm-rt/hpvm-rt.ll -S -o %t.linked.ll ; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin ; RUN: %t.bin diff --git a/hpvm/test/CTestSuite/gemm.c b/hpvm/test/CTestSuite/gemm.c index d0a69ba25c27fb65ea549023deed2dfb0197b882..eb0a3c5e9204d9621c4a15ae7f07ef5158ac1d07 100644 --- a/hpvm/test/CTestSuite/gemm.c +++ b/hpvm/test/CTestSuite/gemm.c @@ -54,14 +54,14 @@ __attribute__((noinline)) int checkResults(float *A, float *B, float *C) { return 1; // Success } -// Dummy visc node execution call -// void __visc__node(void kernel (float*, float*, float*, unsigned, unsigned), +// Dummy hpvm node execution call +// void __hpvm__node(void kernel (float*, float*, float*, unsigned, unsigned), // int numDims, void* dims, int numInputs, void* inputs, int numOutputs, void* // outputs); void matrixMul(float *A, float *B, float *C, unsigned k, unsigned n) { - __visc__attributes(2, A, B, 1, C); + __hpvm__attributes(2, A, B, 1, C); // printf("Entered function\n"); int tx = get_local_id(0); // 2D Global Thread ID x int ty = get_local_id(1); // 2D Global Thread ID y @@ -130,10 +130,10 @@ int main(int argc, char **argv) { // Compute using OpenCL // matrixMul(h_A, h_B, h_C, WA, WB); - //__visc__node(matrixMul, 2, WB, HA, 3, h_A, h_B, h_C, 0); - unsigned graphMM = __visc__node(matrixMul, 1, 2, WB, HA, 8, h_A, bytes_A, h_B, + //__hpvm__node(matrixMul, 2, WB, HA, 3, h_A, h_B, h_C, 0); + unsigned graphMM = __hpvm__node(matrixMul, 1, 2, WB, HA, 8, h_A, bytes_A, h_B, bytes_B, h_C, bytes_C, WA, WB, 0); - __visc__wait(graphMM); + __hpvm__wait(graphMM); if (checkResults(h_A, h_B, h_C)) printf("\nPass!\n"); else diff --git a/hpvm/test/CTestSuite/gemm_2.c b/hpvm/test/CTestSuite/gemm_2.c index bd7ab27fc0160275442d23faf507851b7c2369f7..df4555936316703cfccd4048f2ade4e28592e53a 100644 --- a/hpvm/test/CTestSuite/gemm_2.c +++ b/hpvm/test/CTestSuite/gemm_2.c @@ -54,13 +54,13 @@ __attribute__((noinline)) int checkResults(float *A, float *B, float *C) { return 1; // Success } -// Dummy visc node execution call -// void __visc__node(void kernel (float*, float*, float*, unsigned, unsigned), +// Dummy hpvm node execution call +// void __hpvm__node(void kernel (float*, float*, float*, unsigned, unsigned), // int numDims, void* dims, int numInputs, void* inputs, int numOutputs, void* // outputs); void matrixMul(float *A, float *B, float *C, unsigned k, unsigned n) { - __visc__attributes(2, A, B, 1, C); + __hpvm__attributes(2, A, B, 1, C); // printf("Entered function\n"); int tx = get_global_id(0); // 2D Global Thread ID x @@ -130,11 +130,11 @@ int main(int argc, char **argv) { // Compute using OpenCL // matrixMul(h_A, h_B, h_C, WA, WB); - //__visc__node(matrixMul, 2, WB, HA, 3, h_A, h_B, h_C, 0); + //__hpvm__node(matrixMul, 2, WB, HA, 3, h_A, h_B, h_C, 0); unsigned graphMM = - __visc__node(matrixMul, 2, 2, 16, 16, WB / 16, HA / 16, 8, h_A, bytes_A, + __hpvm__node(matrixMul, 2, 2, 16, 16, WB / 16, HA / 16, 8, h_A, bytes_A, h_B, bytes_B, h_C, bytes_C, WA, WB, 0); - __visc__wait(graphMM); + __hpvm__wait(graphMM); if (checkResults(h_A, h_B, h_C)) printf("\nPass!\n"); else diff --git a/hpvm/test/README.md b/hpvm/test/README.md index 94103affb668afc29d32e52d85d0d60182bd16d8..1cc9abf4f963cffca8d6dbf52e14413172b5a218 100644 --- a/hpvm/test/README.md +++ b/hpvm/test/README.md @@ -9,11 +9,11 @@ Tests may be built for the cpu or gpu with hpvm. # sgemm example cd parboil/benchmarks/sgemm # HPVM cpu -make TARGET=seq VERSION=visc -make run TARGET=seq VERSION=visc +make TARGET=seq VERSION=hpvm +make run TARGET=seq VERSION=hpvm # HPVM gpu -make TARGET=gpu VERSION=visc -make run TARGET=gpu VERSION=visc +make TARGET=gpu VERSION=hpvm +make run TARGET=gpu VERSION=hpvm ``` ## Cava @@ -27,4 +27,4 @@ make TARGET={seq, gpu} ## Your own project See `template/` for an example Makefile and config. -Include `visc.h` to use HPVM C api functions, found in the `test/include/visc.h`. +Include `hpvm.h` to use HPVM C api functions, found in the `test/include/hpvm.h`. diff --git a/hpvm/test/hpvm-cava/.gitignore b/hpvm/test/hpvm-cava/.gitignore index 2fc1b235647962ac761edda7dfbda4499cbcd4f0..f08b880bf9b4b8171e9fb878bea3a6d266a1f9c0 100644 --- a/hpvm/test/hpvm-cava/.gitignore +++ b/hpvm/test/hpvm-cava/.gitignore @@ -1,5 +1,5 @@ build/ -cava-visc +cava-hpvm Makefile.config example-face/*.bin diff --git a/hpvm/test/hpvm-cava/Makefile b/hpvm/test/hpvm-cava/Makefile index 0054af8c4d9cc39c21b00e73a5b53c8ac2a089b8..dd8e4825c8b72ebf44b1acdbe7db2127987d6684 100644 --- a/hpvm/test/hpvm-cava/Makefile +++ b/hpvm/test/hpvm-cava/Makefile @@ -26,21 +26,21 @@ CURRENT_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) INCLUDES += -I$(SRC_DIR) -I$(CAM_PIPE_SRC_DIR) -INCLUDES += -I$(LLVM_SRC_ROOT)/include -I../include -I$(VISC_BUILD_DIR)/include +INCLUDES += -I$(LLVM_SRC_ROOT)/include -I../include -I$(HPVM_BUILD_DIR)/include ifneq ($(CONFUSE_ROOT),) INCLUDES += -I$(CONFUSE_ROOT)/include LFLAGS += -L$(CONFUSE_ROOT)/lib endif -EXE = cava-visc-$(VERSION)-$(TARGET) +EXE = cava-hpvm-$(VERSION)-$(TARGET) LFLAGS += -pthread ## BEGIN HPVM MAKEFILE -LANGUAGE=visc +LANGUAGE=hpvm SRCDIR_OBJS= load_cam_model.ll cam_pipe_utility.ll dma_interface.ll utility.ll OBJS_SRC=src/cam_pipe.c src/pipe_stages.c src/load_cam_model.c src/cam_pipe_utility.c src/dma_interface.c src/utility.c -VISC_OBJS=main.visc.ll +HPVM_OBJS=main.hpvm.ll APP = $(EXE) APP_CUDALDFLAGS=-lm -lstdc++ APP_CFLAGS= $(INCLUDES) -DDMA_MODE -DDMA_INTERFACE_V3 @@ -52,23 +52,23 @@ OBJS_CFLAGS = -O1 $(APP_CFLAGS) $(PLATFORM_CFLAGS) CXXFLAGS = $(APP_CXXFLAGS) $(PLATFORM_CXXFLAGS) LDFLAGS= $(APP_LDFLAGS) $(PLATFORM_LDFLAGS) -VISC_RT_PATH = $(LLVM_BUILD_DIR)/tools/hpvm/projects/visc-rt +HPVM_RT_PATH = $(LLVM_SRC_ROOT)/tools/hpvm/projects/hpvm-rt -VISC_RT_LIB = $(VISC_RT_PATH)/visc-rt.bc +HPVM_RT_LIB = $(HPVM_RT_PATH)/hpvm-rt.bc -TESTGEN_OPTFLAGS = -load LLVMGenVISC.so -genvisc -globaldce +TESTGEN_OPTFLAGS = -load LLVMGenHPVM.so -genhpvm -globaldce ifeq ($(TARGET),seq) DEVICE = CPU_TARGET - VISC_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG - VISC_OPTFLAGS += -visc-timers-x86 + HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG + HPVM_OPTFLAGS += -hpvm-timers-x86 else DEVICE = GPU_TARGET - VISC_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_NVPTX.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -localmem -dfg2llvm-nvptx -dfg2llvm-x86 -clearDFG - VISC_OPTFLAGS += -visc-timers-x86 -visc-timers-ptx + HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_NVPTX.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -localmem -dfg2llvm-nvptx -dfg2llvm-x86 -clearDFG + HPVM_OPTFLAGS += -hpvm-timers-x86 -hpvm-timers-ptx endif - TESTGEN_OPTFLAGS += -visc-timers-gen + TESTGEN_OPTFLAGS += -hpvm-timers-gen CFLAGS += -DDEVICE=$(DEVICE) CXXFLAGS += -DDEVICE=$(DEVICE) @@ -79,7 +79,7 @@ INBUILDDIR=$(addprefix $(BUILD_DIR)/,$(1)) .PRECIOUS: $(BUILD_DIR)/%.ll OBJS = $(call INBUILDDIR,$(SRCDIR_OBJS)) -TEST_OBJS = $(call INBUILDDIR,$(VISC_OBJS)) +TEST_OBJS = $(call INBUILDDIR,$(HPVM_OBJS)) KERNEL = $(TEST_OBJS).kernels.ll ifeq ($(TARGET),gpu) @@ -105,11 +105,11 @@ $(KERNEL_OCL) : $(KERNEL) $(EXE) : $(HOST_LINKED) $(CXX) -O3 $(LDFLAGS) $< -o $@ -$(HOST_LINKED) : $(HOST) $(OBJS) $(VISC_RT_LIB) +$(HOST_LINKED) : $(HOST) $(OBJS) $(HPVM_RT_LIB) $(LLVM_LINK) $^ -S -o $@ -$(HOST) $(KERNEL): $(BUILD_DIR)/$(VISC_OBJS) - $(OPT) $(VISC_OPTFLAGS) -S $< -o $(HOST) +$(HOST) $(KERNEL): $(BUILD_DIR)/$(HPVM_OBJS) + $(OPT) $(HPVM_OPTFLAGS) -S $< -o $(HOST) $(BUILD_DIR): mkdir -p $(BUILD_DIR) @@ -120,7 +120,7 @@ $(BUILD_DIR)/%.ll : $(SRC_DIR)/%.c $(BUILD_DIR)/main.ll : $(SRC_DIR)/main.c $(CC) $(CFLAGS) -emit-llvm -S -o $@ $< -$(BUILD_DIR)/main.visc.ll : $(BUILD_DIR)/main.ll +$(BUILD_DIR)/main.hpvm.ll : $(BUILD_DIR)/main.ll $(OPT) $(TESTGEN_OPTFLAGS) $< -S -o $@ ## END HPVM MAKEFILE diff --git a/hpvm/test/hpvm-cava/README.md b/hpvm/test/hpvm-cava/README.md index 890b629d172a2f53bf77d6d52bda27637c71afeb..1106c4781b285c47d59548d47e5cd03f09063b28 100644 --- a/hpvm/test/hpvm-cava/README.md +++ b/hpvm/test/hpvm-cava/README.md @@ -12,7 +12,7 @@ See the original camera/vision pipeline repo (repo: `yaoyuannnn/cava`) for detai After building HPVM, the following steps are required to build and run the camera pipeline: 1. Build with `make TARGET=seq` for CPU and `make TARGET=gpu` for gpu. -2. Run with `./cava-visc-<Target> example-tulip-small/raw_tulip-small.bin example-tulip-small/tulip-small`. +2. Run with `./cava-hpvm-<Target> example-tulip-small/raw_tulip-small.bin example-tulip-small/tulip-small`. * `<Target>` can be either `seq` or `gpu` depending on what target is used to build. * This processes the raw image `example-tulip-small/raw_tulip-small.bin`. Note that raw images are different from bitmaps, so you might need to obtain them using special software. * This generates: `tulip-small.bin` and `tulip-small-<stage>.bin` where `<stage>` represents the stage of the pipeline. diff --git a/hpvm/test/hpvm-cava/src/cam_pipe.c b/hpvm/test/hpvm-cava/src/cam_pipe.c index 7874ff9d529afebc40d1660637e85b3a1e00f23e..cdeaf393320121706d13d423212896e2551142c8 100644 --- a/hpvm/test/hpvm-cava/src/cam_pipe.c +++ b/hpvm/test/hpvm-cava/src/cam_pipe.c @@ -1,11 +1,11 @@ +#include "cam_pipe_utility.h" +#include "dma_interface.h" +#include "load_cam_model.h" +#include "pipe_stages.h" +#include <assert.h> #include <stdio.h> #include <stdlib.h> #include <string.h> -#include <assert.h> -#include "pipe_stages.h" -#include "load_cam_model.h" -#include "cam_pipe_utility.h" -#include "dma_interface.h" #ifdef DMA_MODE #include "gem5_harness.h" #endif @@ -13,7 +13,7 @@ // FIXME: Include gem5/dma_interface.cc/h separately #ifndef DMA_INTERFACE_V3 #define DMA_INTERFACE_V3 -#endif//DMA_INTERFACE_V3 +#endif // DMA_INTERFACE_V3 /////////////////////////////////////////////////////////////// // Camera Model Parameters @@ -71,7 +71,8 @@ void cam_pipe(uint8_t *host_input, uint8_t *host_result, int row_size, uint8_t *acc_input, *acc_result; float *acc_input_scaled, *acc_result_scaled; float *host_TsTw, *host_ctrl_pts, *host_weights, *host_coefs, *host_tone_map; - float *acc_TsTw, *acc_ctrl_pts, *acc_weights, *acc_coefs, *acc_tone_map, *acc_l2_dist; + float *acc_TsTw, *acc_ctrl_pts, *acc_weights, *acc_coefs, *acc_tone_map, + *acc_l2_dist; strcat(cam_model_path, "cam_models/NikonD7000/"); @@ -84,20 +85,25 @@ void cam_pipe(uint8_t *host_input, uint8_t *host_result, int row_size, host_coefs = get_coefs(cam_model_path, num_ctrl_pts); host_tone_map = get_tone_map(cam_model_path); - acc_input = (uint8_t*) malloc_aligned(sizeof(uint8_t) * row_size * col_size * CHAN_SIZE); - acc_result = (uint8_t*) malloc_aligned(sizeof(uint8_t) * row_size * col_size * CHAN_SIZE); - acc_input_scaled = (float*) malloc_aligned(sizeof(float) * row_size * col_size * CHAN_SIZE); - acc_result_scaled = (float*) malloc_aligned(sizeof(float) * row_size * col_size * CHAN_SIZE); - acc_TsTw = (float*) malloc_aligned(sizeof(float) * 9); - acc_ctrl_pts = (float*) malloc_aligned(sizeof(float) * num_ctrl_pts * CHAN_SIZE); - acc_weights = (float*) malloc_aligned(sizeof(float) * num_ctrl_pts * CHAN_SIZE); - acc_coefs = (float*) malloc_aligned(sizeof(float) * 12); - acc_tone_map = (float*) malloc_aligned(sizeof(float) * 256 * CHAN_SIZE); - acc_l2_dist = (float*) malloc_aligned(sizeof(float) * num_ctrl_pts); + acc_input = (uint8_t *)malloc_aligned(sizeof(uint8_t) * row_size * col_size * + CHAN_SIZE); + acc_result = (uint8_t *)malloc_aligned(sizeof(uint8_t) * row_size * col_size * + CHAN_SIZE); + acc_input_scaled = + (float *)malloc_aligned(sizeof(float) * row_size * col_size * CHAN_SIZE); + acc_result_scaled = + (float *)malloc_aligned(sizeof(float) * row_size * col_size * CHAN_SIZE); + acc_TsTw = (float *)malloc_aligned(sizeof(float) * 9); + acc_ctrl_pts = + (float *)malloc_aligned(sizeof(float) * num_ctrl_pts * CHAN_SIZE); + acc_weights = + (float *)malloc_aligned(sizeof(float) * num_ctrl_pts * CHAN_SIZE); + acc_coefs = (float *)malloc_aligned(sizeof(float) * 12); + acc_tone_map = (float *)malloc_aligned(sizeof(float) * 256 * CHAN_SIZE); + acc_l2_dist = (float *)malloc_aligned(sizeof(float) * num_ctrl_pts); // Load camera model parameters for the ISP - MAP_ARRAY_TO_ACCEL(ISP, "host_TsTw", host_TsTw, - sizeof(float) * 9); + MAP_ARRAY_TO_ACCEL(ISP, "host_TsTw", host_TsTw, sizeof(float) * 9); MAP_ARRAY_TO_ACCEL(ISP, "host_ctrl_pts", host_ctrl_pts, sizeof(float) * num_ctrl_pts * CHAN_SIZE); MAP_ARRAY_TO_ACCEL(ISP, "host_weights", host_weights, @@ -136,4 +142,3 @@ void cam_pipe(uint8_t *host_input, uint8_t *host_result, int row_size, free(acc_tone_map); free(acc_l2_dist); } - diff --git a/hpvm/test/hpvm-cava/src/cam_pipe_utility.c b/hpvm/test/hpvm-cava/src/cam_pipe_utility.c index f806e9ee1a2e288fabcb8ad658a47c3919fbb661..864f02d5b28f2c4738279cf66cba5f4312c2a3de 100644 --- a/hpvm/test/hpvm-cava/src/cam_pipe_utility.c +++ b/hpvm/test/hpvm-cava/src/cam_pipe_utility.c @@ -1,6 +1,6 @@ +#include <assert.h> #include <stdio.h> #include <stdlib.h> -#include <assert.h> #include "cam_pipe_utility.h" //#include "pipe_stages.h" @@ -26,10 +26,11 @@ uint8_t *read_image_from_binary(char *file_path, int *row_size, int *col_size) { return image; } -void write_image_to_binary(char *file_path, uint8_t *image, int row_size, int col_size) { +void write_image_to_binary(char *file_path, uint8_t *image, int row_size, + int col_size) { FILE *fp = fopen(file_path, "w"); - int shape[3] = { row_size, col_size, CHAN_SIZE }; + int shape[3] = {row_size, col_size, CHAN_SIZE}; fwrite(shape, sizeof(int), 3, fp); int size = row_size * col_size * CHAN_SIZE; @@ -40,8 +41,8 @@ void write_image_to_binary(char *file_path, uint8_t *image, int row_size, int co float *transpose_mat(float *inmat, int width, int height) { // Define vectors float *outmat; - int err = - posix_memalign((void **)&outmat, CACHELINE_SIZE, sizeof(float) * height * width); + int err = posix_memalign((void **)&outmat, CACHELINE_SIZE, + sizeof(float) * height * width); assert(err == 0 && "Failed to allocate memory!"); // Transpose the matrix @@ -71,7 +72,7 @@ void convert_chw_to_hwc(uint8_t *input, int row_size, int col_size, uint8_t **result) { if (*result == NULL) { *result = (uint8_t *)malloc_aligned(row_size * col_size * CHAN_SIZE * - sizeof(uint8_t)); + sizeof(uint8_t)); } ARRAY_3D(uint8_t, _input, input, row_size, col_size); ARRAY_3D(uint8_t, _result, *result, col_size, CHAN_SIZE); diff --git a/hpvm/test/hpvm-cava/src/cam_pipe_utility.h b/hpvm/test/hpvm-cava/src/cam_pipe_utility.h index b4fb6cde0c438b23c2b596cf0418953aaedca501..b61b7cc9b52aa59522f93661895fca960b947f17 100644 --- a/hpvm/test/hpvm-cava/src/cam_pipe_utility.h +++ b/hpvm/test/hpvm-cava/src/cam_pipe_utility.h @@ -1,8 +1,8 @@ #ifndef _CAM_PIPE_UTILITY_H_ #define _CAM_PIPE_UTILITY_H_ -#include "utility.h" #include "pipe_stages.h" +#include "utility.h" uint8_t *read_image_from_binary(char *file_path, int *row_size, int *col_size); void write_image_to_binary(char *file_path, uint8_t *image, int row_size, diff --git a/hpvm/test/hpvm-cava/src/defs.h b/hpvm/test/hpvm-cava/src/defs.h index ccc8acc857c36fd13115670932a38dc3a406dc29..0fa95ef3d2ea55c67a921e0bc5fc8a6ec6ba949f 100644 --- a/hpvm/test/hpvm-cava/src/defs.h +++ b/hpvm/test/hpvm-cava/src/defs.h @@ -10,46 +10,46 @@ typedef unsigned long uint64_t; // Debugging message macros. #if DEBUG_LEVEL >= 1 - #define INFO_MSG(args...) printf(args) - - #if DEBUG_LEVEL >= 2 - #define PRINT_MSG(args...) printf(args) - #define PRINT_DEBUG(hid, rows, cols, num_cols) \ - print_debug(hid, rows, cols, num_cols) - #define PRINT_DEBUG4D(hid, rows, cols, height) \ - print_debug4d(hid, rows, cols, height) - #define PRINT_DEBUG4D_FP16(hid, num, height, rows, cols) \ - print_debug4d_fp16(hid, num, height, rows, cols) - - #if DEBUG_LEVEL >= 3 - #define PRINT_DEBUG_V(hid, rows, cols, num_cols) \ - print_debug(hid, rows, cols, num_cols) - #define PRINT_DEBUG4D_V(hid, rows, cols, height) \ - print_debug4d(hid, rows, cols, height) - #define PRINT_MSG_V(args...) printf(args) - #else - #define PRINT_DEBUG_V(hid, rows, cols, num_cols) - #define PRINT_DEBUG4D_V(hid, rows, cols, height) - #define PRINT_MSG_V(args...) - #endif - #else - #define PRINT_MSG(args...) - #define PRINT_DEBUG(hid, rows, cols, num_cols) - #define PRINT_DEBUG4D(hid, rows, cols, height) - #define PRINT_DEBUG4D_FP16(hid, num, height, rows, cols) - #define PRINT_DEBUG_V(hid, rows, cols, height) - #define PRINT_DEBUG4D_V(hid, rows, cols, height) - #define PRINT_MSG_V(args...) - #endif +#define INFO_MSG(args...) printf(args) + +#if DEBUG_LEVEL >= 2 +#define PRINT_MSG(args...) printf(args) +#define PRINT_DEBUG(hid, rows, cols, num_cols) \ + print_debug(hid, rows, cols, num_cols) +#define PRINT_DEBUG4D(hid, rows, cols, height) \ + print_debug4d(hid, rows, cols, height) +#define PRINT_DEBUG4D_FP16(hid, num, height, rows, cols) \ + print_debug4d_fp16(hid, num, height, rows, cols) + +#if DEBUG_LEVEL >= 3 +#define PRINT_DEBUG_V(hid, rows, cols, num_cols) \ + print_debug(hid, rows, cols, num_cols) +#define PRINT_DEBUG4D_V(hid, rows, cols, height) \ + print_debug4d(hid, rows, cols, height) +#define PRINT_MSG_V(args...) printf(args) #else - #define INFO_MSG(args...) - #define PRINT_DEBUG(hid, rows, cols, num_cols) - #define PRINT_DEBUG4D(hid, rows, cols, height) - #define PRINT_DEBUG4D_FP16(hid, num, height, rows, cols) - #define PRINT_MSG(args...) - #define PRINT_DEBUG_V(hid, rows, cols, height) - #define PRINT_DEBUG4D_V(hid, rows, cols, height) - #define PRINT_MSG_V(args...) +#define PRINT_DEBUG_V(hid, rows, cols, num_cols) +#define PRINT_DEBUG4D_V(hid, rows, cols, height) +#define PRINT_MSG_V(args...) +#endif +#else +#define PRINT_MSG(args...) +#define PRINT_DEBUG(hid, rows, cols, num_cols) +#define PRINT_DEBUG4D(hid, rows, cols, height) +#define PRINT_DEBUG4D_FP16(hid, num, height, rows, cols) +#define PRINT_DEBUG_V(hid, rows, cols, height) +#define PRINT_DEBUG4D_V(hid, rows, cols, height) +#define PRINT_MSG_V(args...) +#endif +#else +#define INFO_MSG(args...) +#define PRINT_DEBUG(hid, rows, cols, num_cols) +#define PRINT_DEBUG4D(hid, rows, cols, height) +#define PRINT_DEBUG4D_FP16(hid, num, height, rows, cols) +#define PRINT_MSG(args...) +#define PRINT_DEBUG_V(hid, rows, cols, height) +#define PRINT_DEBUG4D_V(hid, rows, cols, height) +#define PRINT_MSG_V(args...) #endif #define STRING(arg) #arg @@ -72,9 +72,9 @@ typedef unsigned long uint64_t; #define max3(e0, e1, e2) max2(max2(e0, e1), e2) #define max4(e0, e1, e2, e3) max2(max2(e0, e1), max2(e2, e3)) #define max8(e0, e1, e2, e3, e4, e5, e6, e7) \ - max2(max4(e0, e1, e2, e3), max4(e4, e5, e6, e7)) + max2(max4(e0, e1, e2, e3), max4(e4, e5, e6, e7)) #define max9(e0, e1, e2, e3, e4, e5, e6, e7, e8) \ - max2(max8(e0, e1, e2, e3, e4, e5, e6, e7), e8) + max2(max8(e0, e1, e2, e3, e4, e5, e6, e7), e8) #define min2(A, B) (((A) < (B)) ? (A) : (B)) @@ -92,7 +92,8 @@ typedef unsigned long uint64_t; // If GEM5_HARNESS is defined: // // MAP_ARRAY_TO_ACCEL(myReqCode, myArrayName, myArrayPtr, mySize) -// ===> mapArrayToAccelerator(myReqCode, myArrayName, myArrayPtr, mySize) +// ===> mapArrayToAccelerator(myReqCode, myArrayName, myArrayPtr, +// mySize) // // INVOKE_KERNEL(myReqCode, kernelFuncName, args...) // ===> invokeAcceleratorAndBlock(myReqCode) @@ -107,69 +108,69 @@ typedef unsigned long uint64_t; #ifdef GEM5_HARNESS #define MAP_ARRAY_TO_ACCEL(req_code, name, base_addr, size) \ - mapArrayToAccelerator(req_code, name, base_addr, size) + mapArrayToAccelerator(req_code, name, base_addr, size) #define INVOKE_KERNEL(req_code, kernel_ptr, args...) \ - do { \ - UNUSED(kernel_ptr); \ - invokeAcceleratorAndBlock(req_code); \ - } while (0) + do { \ + UNUSED(kernel_ptr); \ + invokeAcceleratorAndBlock(req_code); \ + } while (0) #define INVOKE_KERNEL_NOBLOCK(req_code, finish_flag, kernel_ptr, args...) \ - do { \ - UNUSED(kernel_ptr); \ - invokeAcceleratorAndReturn2(req_code, finish_flag); \ - } while (0) + do { \ + UNUSED(kernel_ptr); \ + invokeAcceleratorAndReturn2(req_code, finish_flag); \ + } while (0) #define INVOKE_DMA_READ_TRAFFIC_GEN(start_addr, size) \ - do { \ - invokeAladdinTrafficGenAndBlock(start_addr, size, false, false); \ - } while (0) + do { \ + invokeAladdinTrafficGenAndBlock(start_addr, size, false, false); \ + } while (0) #define INVOKE_DMA_WRITE_TRAFFIC_GEN(start_addr, size) \ - do { \ - invokeAladdinTrafficGenAndBlock(start_addr, size, true, false); \ - } while (0) + do { \ + invokeAladdinTrafficGenAndBlock(start_addr, size, true, false); \ + } while (0) #define INVOKE_ACP_READ_TRAFFIC_GEN(start_addr, size) \ - do { \ - invokeAladdinTrafficGenAndBlock(start_addr, size, false, true); \ - } while (0) + do { \ + invokeAladdinTrafficGenAndBlock(start_addr, size, false, true); \ + } while (0) #define INVOKE_ACP_WRITE_TRAFFIC_GEN(start_addr, size) \ - do { \ - invokeAladdinTrafficGenAndBlock(start_addr, size, true, true); \ - } while (0) + do { \ + invokeAladdinTrafficGenAndBlock(start_addr, size, true, true); \ + } while (0) #else #define MAP_ARRAY_TO_ACCEL(req_code, name, base_addr, size) \ - do { \ - INFO_MSG("Mapping array %s @ %p, size %d.\n", \ - name, (void*)base_addr, (int)(size)); \ - UNUSED(req_code); \ - UNUSED(name); \ - UNUSED(base_addr); \ - UNUSED(size); \ - } while (0) + do { \ + INFO_MSG("Mapping array %s @ %p, size %d.\n", name, (void *)base_addr, \ + (int)(size)); \ + UNUSED(req_code); \ + UNUSED(name); \ + UNUSED(base_addr); \ + UNUSED(size); \ + } while (0) #define INVOKE_KERNEL(req_code, kernel_ptr, args...) kernel_ptr(args) #define INVOKE_KERNEL_NOBLOCK(req_code, finish_flag, kernel_ptr, args...) \ - kernel_ptr(args) + kernel_ptr(args) #define INVOKE_DMA_READ_TRAFFIC_GEN(start_addr, size) \ - do { \ - UNUSED(start_addr); \ - UNUSED(size); \ - } while (0) + do { \ + UNUSED(start_addr); \ + UNUSED(size); \ + } while (0) #define INVOKE_DMA_WRITE_TRAFFIC_GEN(start_addr, size) \ - do { \ - UNUSED(start_addr); \ - UNUSED(size); \ - } while (0) + do { \ + UNUSED(start_addr); \ + UNUSED(size); \ + } while (0) #define INVOKE_ACP_READ_TRAFFIC_GEN(start_addr, size) \ - do { \ - UNUSED(start_addr); \ - UNUSED(size); \ - } while (0) + do { \ + UNUSED(start_addr); \ + UNUSED(size); \ + } while (0) #define INVOKE_ACP_WRITE_TRAFFIC_GEN(start_addr, size) \ - do { \ - UNUSED(start_addr); \ - UNUSED(size); \ - } while (0) + do { \ + UNUSED(start_addr); \ + UNUSED(size); \ + } while (0) #endif @@ -177,14 +178,14 @@ typedef unsigned long uint64_t; // // This assumes that the current name of the base pointer is also the name of // the array in the top level function of the dynamic trace. THIS IS VERY -// IMPORTANT - if the argument passed to a top level function has been renamed in -// the function, then this WILL NOT WORK! +// IMPORTANT - if the argument passed to a top level function has been renamed +// in the function, then this WILL NOT WORK! // // MAP_ARRAY(myReqCode, myArray, mySize) // ===> MAP_ARRAY_TO_ACCEL(myReqCode, "myArray", myArray, mySize) #define MAP_ARRAY(req_code, name_and_base_addr, size) \ - MAP_ARRAY_TO_ACCEL( \ - req_code, STRING(name_and_base_addr), name_and_base_addr, size) + MAP_ARRAY_TO_ACCEL(req_code, STRING(name_and_base_addr), name_and_base_addr, \ + size) // Use these convenience macros to cast a raw pointer into a multidimensional // variable-length array, which lets us use [] notation inside of the ugly @@ -202,23 +203,24 @@ typedef unsigned long uint64_t; // // And so on... #define ARRAY_1D(TYPE, output_array_name, input_array_name) \ - TYPE* output_array_name = (TYPE*)input_array_name + TYPE *output_array_name = (TYPE *)input_array_name #define ARRAY_2D(TYPE, output_array_name, input_array_name, DIM_1) \ - TYPE(*output_array_name)[DIM_1] = (TYPE(*)[DIM_1])input_array_name + TYPE(*output_array_name)[DIM_1] = (TYPE(*)[DIM_1])input_array_name #define ARRAY_3D(TYPE, output_array_name, input_array_name, DIM_1, DIM_2) \ - TYPE(*output_array_name)[DIM_1][DIM_2] = \ - (TYPE(*)[DIM_1][DIM_2])input_array_name - -#define ARRAY_4D( \ - TYPE, output_array_name, input_array_name, DIM_1, DIM_2, DIM_3) \ - TYPE(*output_array_name)[DIM_1][DIM_2][DIM_3] = \ - (TYPE(*)[DIM_1][DIM_2][DIM_3])input_array_name - -#define ARRAY_5D( \ - TYPE, output_array_name, input_array_name, DIM_1, DIM_2, DIM_3, DIM_4) \ - TYPE(*output_array_name)[DIM_1][DIM_2][DIM_3][DIM_4] = \ - (TYPE(*)[DIM_1][DIM_2][DIM_3][DIM_4])input_array_name + TYPE(*output_array_name) \ + [DIM_1][DIM_2] = (TYPE(*)[DIM_1][DIM_2])input_array_name + +#define ARRAY_4D(TYPE, output_array_name, input_array_name, DIM_1, DIM_2, \ + DIM_3) \ + TYPE(*output_array_name) \ + [DIM_1][DIM_2][DIM_3] = (TYPE(*)[DIM_1][DIM_2][DIM_3])input_array_name + +#define ARRAY_5D(TYPE, output_array_name, input_array_name, DIM_1, DIM_2, \ + DIM_3, DIM_4) \ + TYPE(*output_array_name) \ + [DIM_1][DIM_2][DIM_3][DIM_4] = \ + (TYPE(*)[DIM_1][DIM_2][DIM_3][DIM_4])input_array_name #endif diff --git a/hpvm/test/hpvm-cava/src/dma_interface.c b/hpvm/test/hpvm-cava/src/dma_interface.c index 81bce54469886153170f994a77250a784cc9b7d7..68698635a4fceb4fe67e323bd0f354bd70bca99d 100644 --- a/hpvm/test/hpvm-cava/src/dma_interface.c +++ b/hpvm/test/hpvm-cava/src/dma_interface.c @@ -1,6 +1,6 @@ +#include "dma_interface.h" #include <assert.h> #include <string.h> -#include "dma_interface.h" // All _dmaImplN functions must be always inlined or we'll get extra functions // in the trace. @@ -10,22 +10,22 @@ // Starting with version 3, all versioning will be distinguished by the return // value of the DMA functions. -__attribute__((__always_inline__)) -int _dmaImpl3(void* dst_addr, void* src_addr, size_t size) { +__attribute__((__always_inline__)) int _dmaImpl3(void *dst_addr, void *src_addr, + size_t size) { assert(size > 0); memmove(dst_addr, src_addr, size); return 3; } -int dmaLoad(void* dst_addr, void* src_host_addr, size_t size) { +int dmaLoad(void *dst_addr, void *src_host_addr, size_t size) { return _dmaImpl3(dst_addr, src_host_addr, size); } -int dmaStore(void* dst_host_addr, void* src_addr, size_t size) { +int dmaStore(void *dst_host_addr, void *src_addr, size_t size) { return _dmaImpl3(dst_host_addr, src_addr, size); } -int setReadyBits(void* start_addr, size_t size, unsigned value) { +int setReadyBits(void *start_addr, size_t size, unsigned value) { asm(""); return 0; } @@ -35,39 +35,37 @@ int setReadyBits(void* start_addr, size_t size, unsigned value) { // With version 2 and earlier, we return (void*)NULL and use the number of // function arguments to distinguish the DMA functions. -__attribute__((__always_inline__)) -void* _dmaImpl2(void* base_addr, size_t src_off, size_t dst_off, size_t size) { +__attribute__((__always_inline__)) void * +_dmaImpl2(void *base_addr, size_t src_off, size_t dst_off, size_t size) { assert(size > 0); memmove(base_addr + dst_off, base_addr + src_off, size); return NULL; } -void* dmaLoad(void* base_addr, size_t src_off, size_t dst_off, size_t size) { +void *dmaLoad(void *base_addr, size_t src_off, size_t dst_off, size_t size) { return _dmaImpl2(base_addr, src_off, dst_off, size); } -void* dmaStore(void* base_addr, size_t src_off, size_t dst_off, size_t size) { +void *dmaStore(void *base_addr, size_t src_off, size_t dst_off, size_t size) { return _dmaImpl2(base_addr, src_off, dst_off, size); } #else -__attribute__((__always_inline__)) -void* _dmaImpl1(void* base_addr, size_t offset, size_t size) { +__attribute__((__always_inline__)) void *_dmaImpl1(void *base_addr, + size_t offset, size_t size) { assert(size > 0); asm(""); return NULL; } -void* dmaLoad(void* addr, size_t offset, size_t size) { +void *dmaLoad(void *addr, size_t offset, size_t size) { return _dmaImpl1(addr, offset, size); } -void* dmaStore(void* addr, size_t offset, size_t size) { +void *dmaStore(void *addr, size_t offset, size_t size) { return _dmaImpl1(addr, offset, size); } #endif -void dmaFence() { - asm(""); -} +void dmaFence() { asm(""); } diff --git a/hpvm/test/hpvm-cava/src/dma_interface.h b/hpvm/test/hpvm-cava/src/dma_interface.h index f23234eede4df99db84b144646530dfe240c6e62..771ece523824cff5923581aca671ab7d26fae706 100644 --- a/hpvm/test/hpvm-cava/src/dma_interface.h +++ b/hpvm/test/hpvm-cava/src/dma_interface.h @@ -10,12 +10,12 @@ // Version 3 of the DMA interface enables memcpy operations from arbitrary // source and destination addresses. -int dmaLoad(void* dst_addr, void* src_host_addr, size_t size); -int dmaStore(void* dst_host_addr, void* src_addr, size_t size); +int dmaLoad(void *dst_addr, void *src_host_addr, size_t size); +int dmaStore(void *dst_host_addr, void *src_addr, size_t size); // The user can explicitly toggle the state of ready bits, if ready mode is // enabled. This requires support from DMA v3. -int setReadyBits(void* start_addr, size_t size, unsigned value); +int setReadyBits(void *start_addr, size_t size, unsigned value); #elif defined(DMA_INTERFACE_V2) @@ -26,17 +26,18 @@ int setReadyBits(void* start_addr, size_t size, unsigned value); // actually copied from source to destination (the memory copy will not show up // in the trace). -void* dmaLoad(void* base_addr, size_t src_off, size_t dst_off, size_t size); -void* dmaStore(void* base_addr, size_t src_off, size_t dst_off, size_t size); +void *dmaLoad(void *base_addr, size_t src_off, size_t dst_off, size_t size); +void *dmaStore(void *base_addr, size_t src_off, size_t dst_off, size_t size); #else #warning "DMA interface v1 is deprecated!" -// Version 1 of the DMA interface is now deprecated and will be removed entirely. +// Version 1 of the DMA interface is now deprecated and will be removed +// entirely. -void* dmaLoad(void* addr, size_t offset, size_t size); -void* dmaStore(void* addr, size_t offset, size_t size); +void *dmaLoad(void *addr, size_t offset, size_t size); +void *dmaStore(void *addr, size_t offset, size_t size); #endif void dmaFence(); diff --git a/hpvm/test/hpvm-cava/src/load_cam_model.c b/hpvm/test/hpvm-cava/src/load_cam_model.c index 124fe0b7d175c2655feac562ecd6e2a5b73cc96a..dffb12b04b1e8f1cc56060737527a33de074d4a5 100644 --- a/hpvm/test/hpvm-cava/src/load_cam_model.c +++ b/hpvm/test/hpvm-cava/src/load_cam_model.c @@ -1,13 +1,14 @@ +#include <assert.h> #include <stdio.h> #include <stdlib.h> #include <string.h> -#include <assert.h> + #include "utility.h" #include "pipe_stages.h" #include "load_cam_model.h" // Get color space transform -float* get_Ts(char* cam_model_path) { +float *get_Ts(char *cam_model_path) { float *Ts; int err = posix_memalign((void **)&Ts, CACHELINE_SIZE, sizeof(float) * 9); assert(err == 0 && "Failed to allocate memory!"); @@ -32,7 +33,7 @@ float* get_Ts(char* cam_model_path) { str = strtok(line, " \n"); int i = 0; while (str != NULL) { - line_data[i] = atof(str); + line_data[i] = atof(str); str = strtok(NULL, " \n"); i++; } @@ -50,7 +51,7 @@ float* get_Ts(char* cam_model_path) { } // Get white balance transform -float* get_Tw(char* cam_model_path, int wb_index) { +float *get_Tw(char *cam_model_path, int wb_index) { float *Tw; int err = posix_memalign((void **)&Tw, CACHELINE_SIZE, sizeof(float) * 9); assert(err == 0 && "Failed to allocate memory!"); @@ -62,7 +63,7 @@ float* get_Tw(char* cam_model_path, int wb_index) { // Calculate base for the white balance transform selected // For more details see the camera model readme - int wb_base = 8 + 5*(wb_index-1); + int wb_base = 8 + 5 * (wb_index - 1); // Open file for reading // Open file for reading @@ -81,15 +82,15 @@ float* get_Tw(char* cam_model_path, int wb_index) { str = strtok(line, " \n"); int i = 0; while (str != NULL) { - line_data[i] = atof(str); + line_data[i] = atof(str); str = strtok(NULL, " \n"); i++; } if (line_idx == wb_base) { // Convert the white balance vector into a diagaonal matrix - for (int i=0; i<3; i++) { - for (int j=0; j<3; j++) { + for (int i = 0; i < 3; i++) { + for (int j = 0; j < 3; j++) { if (i == j) { Tw[i * 3 + j] = line_data[i]; } else { @@ -105,9 +106,8 @@ float* get_Tw(char* cam_model_path, int wb_index) { return Tw; } - // Get combined transforms for checking -float* get_TsTw(char* cam_model_path, int wb_index) { +float *get_TsTw(char *cam_model_path, int wb_index) { float *TsTw; int err = posix_memalign((void **)&TsTw, CACHELINE_SIZE, sizeof(float) * 9); assert(err == 0 && "Failed to allocate memory!"); @@ -119,7 +119,7 @@ float* get_TsTw(char* cam_model_path, int wb_index) { // Calculate base for the white balance transform selected // For more details see the camera model readme - int wb_base = 5 + 5*(wb_index-1); + int wb_base = 5 + 5 * (wb_index - 1); // Open file for reading char file_name[] = "raw2jpg_transform.txt"; @@ -137,7 +137,7 @@ float* get_TsTw(char* cam_model_path, int wb_index) { str = strtok(line, " \n"); int i = 0; while (str != NULL) { - line_data[i] = atof(str); + line_data[i] = atof(str); str = strtok(NULL, " \n"); i++; } @@ -155,7 +155,7 @@ float* get_TsTw(char* cam_model_path, int wb_index) { } // Get control points -float* get_ctrl_pts(char* cam_model_path, int num_cntrl_pts) { +float *get_ctrl_pts(char *cam_model_path, int num_cntrl_pts) { float *ctrl_pnts; int err = posix_memalign((void **)&ctrl_pnts, CACHELINE_SIZE, sizeof(float) * num_cntrl_pts * 3); @@ -200,7 +200,7 @@ float* get_ctrl_pts(char* cam_model_path, int num_cntrl_pts) { } // Get weights -float* get_weights(char* cam_model_path, int num_cntrl_pts) { +float *get_weights(char *cam_model_path, int num_cntrl_pts) { float *weights; int err = posix_memalign((void **)&weights, CACHELINE_SIZE, sizeof(float) * num_cntrl_pts * 3); @@ -245,7 +245,7 @@ float* get_weights(char* cam_model_path, int num_cntrl_pts) { } // Get coeficients -float* get_coefs(char* cam_model_path, int num_cntrl_pts) { +float *get_coefs(char *cam_model_path, int num_cntrl_pts) { float *coefs; int err = posix_memalign((void **)&coefs, CACHELINE_SIZE, sizeof(float) * 12); assert(err == 0 && "Failed to allocate memory!"); @@ -288,9 +288,8 @@ float* get_coefs(char* cam_model_path, int num_cntrl_pts) { return coefs; } - // Get tone mapping table -float* get_tone_map(char* cam_model_path) { +float *get_tone_map(char *cam_model_path) { float *tone_map; int err = posix_memalign((void **)&tone_map, CACHELINE_SIZE, sizeof(float) * 256 * CHAN_SIZE); diff --git a/hpvm/test/hpvm-cava/src/main.c b/hpvm/test/hpvm-cava/src/main.c index e43bbb4f25c4c97c9907ebae37251c854860c3b5..d3834165a86ba114ef4b2369af980b02dbfb62c1 100644 --- a/hpvm/test/hpvm-cava/src/main.c +++ b/hpvm/test/hpvm-cava/src/main.c @@ -1,16 +1,16 @@ +#include "utility.h" #include <argp.h> +#include <assert.h> +#include <math.h> #include <stdio.h> #include <stdlib.h> -#include <assert.h> #include <string.h> -#include <math.h> -#include "utility.h" #include "cam_pipe_utility.h" -#include "pipe_stages.h" #include "load_cam_model.h" +#include "pipe_stages.h" -#include "visc.h" +#include "hpvm.h" int NUM_TEST_CASES; int NUM_CLASSES; @@ -20,117 +20,129 @@ int NUM_WORKER_THREADS; // Type of struct that is used to pass arguments to the HPVM dataflow graph // using the hpvm launch operation typedef struct __attribute__((__packed__)) { - uint8_t *input; size_t bytes_input; - uint8_t *result; size_t bytes_result; - float *input_scaled; size_t bytes_input_scaled; - float *result_scaled; size_t bytes_result_scaled; - float *demosaic_out; size_t bytes_demosaic_out; - float *denoise_out; size_t bytes_denoise_out; - float *transform_out; size_t bytes_transform_out; - float *gamut_out;size_t bytes_gamut_out; - float *TsTw; size_t bytes_TsTw; - float *ctrl_pts; size_t bytes_ctrl_pts; - float *weights; size_t bytes_weights; - float*coefs; size_t bytes_coefs; - float *l2_dist; size_t bytes_l2_dist; - float *tone_map; size_t bytes_tone_map; - size_t row_size; size_t col_size; -} -RootIn; + uint8_t *input; + size_t bytes_input; + uint8_t *result; + size_t bytes_result; + float *input_scaled; + size_t bytes_input_scaled; + float *result_scaled; + size_t bytes_result_scaled; + float *demosaic_out; + size_t bytes_demosaic_out; + float *denoise_out; + size_t bytes_denoise_out; + float *transform_out; + size_t bytes_transform_out; + float *gamut_out; + size_t bytes_gamut_out; + float *TsTw; + size_t bytes_TsTw; + float *ctrl_pts; + size_t bytes_ctrl_pts; + float *weights; + size_t bytes_weights; + float *coefs; + size_t bytes_coefs; + float *l2_dist; + size_t bytes_l2_dist; + float *tone_map; + size_t bytes_tone_map; + size_t row_size; + size_t col_size; +} RootIn; typedef enum _argnum { - RAW_IMAGE_BIN, - OUTPUT_IMAGE_BIN, - NUM_REQUIRED_ARGS, - DATA_FILE = NUM_REQUIRED_ARGS, - NUM_ARGS, + RAW_IMAGE_BIN, + OUTPUT_IMAGE_BIN, + NUM_REQUIRED_ARGS, + DATA_FILE = NUM_REQUIRED_ARGS, + NUM_ARGS, } argnum; typedef struct _arguments { - char* args[NUM_ARGS]; - int num_inputs; - int num_threads; + char *args[NUM_ARGS]; + int num_inputs; + int num_threads; } arguments; static char prog_doc[] = "\nCamera pipeline on gem5-Aladdin.\n"; static char args_doc[] = "path/to/raw-image-binary path/to/output-image-binary"; static struct argp_option options[] = { - { "num-inputs", 'n', "N", 0, "Number of input images" }, { 0 }, - { "data-file", 'f', "F", 0, - "File to read data and weights from (if data-init-mode == READ_FILE or " - "save-params is true). *.txt files are decoded as text files, while " - "*.bin files are decoded as binary files." }, + {"num-inputs", 'n', "N", 0, "Number of input images"}, + {0}, + {"data-file", 'f', "F", 0, + "File to read data and weights from (if data-init-mode == READ_FILE or " + "save-params is true). *.txt files are decoded as text files, while " + "*.bin files are decoded as binary files."}, }; -static error_t parse_opt(int key, char* arg, struct argp_state* state) { - arguments* args = (arguments*)(state->input); - switch (key) { - case 'n': { - args->num_inputs = strtol(arg, NULL, 10); - break; - } - case 'f': { - args->args[DATA_FILE] = arg; - break; - } - case 't': { - args->num_threads = strtol(arg, NULL, 10); - break; - } - case ARGP_KEY_ARG: { - if (state->arg_num >= NUM_REQUIRED_ARGS) - argp_usage(state); - args->args[state->arg_num] = arg; - break; - } - case ARGP_KEY_END: { - if (state->arg_num < NUM_REQUIRED_ARGS) { - fprintf(stderr, - "Not enough arguments! Got %d, require %d.\n", - state->arg_num, - NUM_REQUIRED_ARGS); - argp_usage(state); - } - break; - } - default: - return ARGP_ERR_UNKNOWN; +static error_t parse_opt(int key, char *arg, struct argp_state *state) { + arguments *args = (arguments *)(state->input); + switch (key) { + case 'n': { + args->num_inputs = strtol(arg, NULL, 10); + break; + } + case 'f': { + args->args[DATA_FILE] = arg; + break; + } + case 't': { + args->num_threads = strtol(arg, NULL, 10); + break; + } + case ARGP_KEY_ARG: { + if (state->arg_num >= NUM_REQUIRED_ARGS) + argp_usage(state); + args->args[state->arg_num] = arg; + break; + } + case ARGP_KEY_END: { + if (state->arg_num < NUM_REQUIRED_ARGS) { + fprintf(stderr, "Not enough arguments! Got %d, require %d.\n", + state->arg_num, NUM_REQUIRED_ARGS); + argp_usage(state); } - return 0; + break; + } + default: + return ARGP_ERR_UNKNOWN; + } + return 0; } -void set_default_args(arguments* args) { - args->num_inputs = 1; - args->num_threads = 0; - for (int i = 0; i < NUM_ARGS; i++) { - args->args[i] = NULL; - } +void set_default_args(arguments *args) { + args->num_inputs = 1; + args->num_threads = 0; + for (int i = 0; i < NUM_ARGS; i++) { + args->args[i] = NULL; + } } -static struct argp parser = { options, parse_opt, args_doc, prog_doc }; +static struct argp parser = {options, parse_opt, args_doc, prog_doc}; // Helper function for printing intermediate results -void descale_cpu(float *input, size_t bytes_input, - uint8_t *output, size_t bytes_result, - size_t row_size, size_t col_size) { - +void descale_cpu(float *input, size_t bytes_input, uint8_t *output, + size_t bytes_result, size_t row_size, size_t col_size) { + for (int chan = 0; chan < CHAN_SIZE; chan++) for (int row = 0; row < row_size; row++) for (int col = 0; col < col_size; col++) { - int index = (chan*row_size + row) * col_size + col; + int index = (chan * row_size + row) * col_size + col; output[index] = min(max(input[index] * 255, 0), 255); } } static void sort(float arr[], int n) { - int i, j; - for (i = 0; i < n - 1; i++) - for (j = 0; j < n - i - 1; j++) - if (arr[j] > arr[j + 1]) { - float temp = arr[j]; - arr[j] = arr[j + 1]; - arr[j + 1] = temp; - } + int i, j; + for (i = 0; i < n - 1; i++) + for (j = 0; j < n - i - 1; j++) + if (arr[j] > arr[j + 1]) { + float temp = arr[j]; + arr[j] = arr[j + 1]; + arr[j + 1] = temp; + } } /**************************************************************/ @@ -140,256 +152,259 @@ static void sort(float arr[], int n) { // In this benchmark, no use of HPVM query intrinsics in the leaf node functions // Leaf HPVM node function for scale -void scale_fxp(uint8_t *input, size_t bytes_input, - float *output, size_t bytes_output, - size_t row_size, size_t col_size) { +void scale_fxp(uint8_t *input, size_t bytes_input, float *output, + size_t bytes_output, size_t row_size, size_t col_size) { - //Specifies compilation target for current node - __visc__hint(CPU_TARGET); + // Specifies compilation target for current node + __hpvm__hint(CPU_TARGET); // Specifies pointer arguments that will be used as "in" and "out" arguments // - count of "in" arguments // - list of "in" argument , and similar for "out" - __visc__attributes(2, input, output, 1, output); - void* thisNode = __visc__getNode(); - int row = __visc__getNodeInstanceID_x(thisNode); + __hpvm__attributes(2, input, output, 1, output); + void *thisNode = __hpvm__getNode(); + int row = __hpvm__getNodeInstanceID_x(thisNode); for (int chan = 0; chan < CHAN_SIZE; chan++) -// for (int row = 0; row < row_size; row++) - for (int col = 0; col < col_size; col++){ - int index = (chan*row_size + row) * col_size + col; - output[index] = input[index] * 1.0 / 255; - } - __visc__return(1, bytes_output); + // for (int row = 0; row < row_size; row++) + for (int col = 0; col < col_size; col++) { + int index = (chan * row_size + row) * col_size + col; + output[index] = input[index] * 1.0 / 255; + } + __hpvm__return(1, bytes_output); } // Leaf HPVM node function for descale -void descale_fxp(float *input, size_t bytes_input, - uint8_t *output, size_t bytes_result, - size_t row_size, size_t col_size) { - __visc__hint(CPU_TARGET); - __visc__attributes(2, input, output, 1, output); - +void descale_fxp(float *input, size_t bytes_input, uint8_t *output, + size_t bytes_result, size_t row_size, size_t col_size) { + __hpvm__hint(CPU_TARGET); + __hpvm__attributes(2, input, output, 1, output); + for (int chan = 0; chan < CHAN_SIZE; chan++) for (int row = 0; row < row_size; row++) for (int col = 0; col < col_size; col++) { - int index = (chan*row_size + row) * col_size + col; + int index = (chan * row_size + row) * col_size + col; output[index] = min(max(input[index] * 255, 0), 255); } - __visc__return(1, bytes_result); + __hpvm__return(1, bytes_result); } // Leaf HPVM node function for demosaicing -void demosaic_fxp(float *input, size_t bytes_input, - float *result, size_t bytes_result, - size_t row_size, size_t col_size) { - __visc__hint(DEVICE); - __visc__attributes(2, input, result, 1, result); - - void* thisNode = __visc__getNode(); - int row = __visc__getNodeInstanceID_x(thisNode); -// for (int row = 1; row < row_size - 1; row++) - for (int col = 1; col < col_size - 1; col++) { - int index_0 = (0 * row_size + row) * col_size + col; - int index_1 = (1 * row_size + row) * col_size + col; - int index_2 = (2 * row_size + row) * col_size + col; - if (row % 2 == 0 && col % 2 == 0) { - // Green pixel - // Getting the R values - float R1 = input[index_0 - 1]; - float R2 = input[index_0 + 1]; - // Getting the B values - float B1 = input[index_2 - col_size]; - float B2 = input[index_2 + col_size]; - // R - result[index_0] = (R1 + R2) / 2; - // G - result[index_1] = input[index_1] * 2; - // B - result[index_2] = (B1 + B2) / 2; - } else if (row % 2 == 0 && col % 2 == 1) { - // Red pixel - // Getting the G values - float G1 = input[index_1 - col_size]; - float G2 = input[index_1 + col_size]; - float G3 = input[index_1 - 1]; - float G4 = input[index_1 + 1]; - // Getting the B values - float B1 = input[index_2 - col_size - 1]; - float B2 = input[index_2 - col_size + 1]; - float B3 = input[index_2 + col_size - 1]; - float B4 = input[index_2 + col_size + 1]; - // R - result[index_0] = input[index_0]; - // G - result[index_1] = (G1 + G2 + G3 + G4) / 2; - // B (center pixel) - result[index_2] = (B1 + B2 + B3 + B4) / 4; - } else if (row % 2 == 1 && col % 2 == 0) { - // Blue pixel - // Getting the R values - float R1 = input[index_0 - col_size - 1]; - float R2 = input[index_0 + col_size - 1]; - float R3 = input[index_0 - col_size + 1]; - float R4 = input[index_0 + col_size + 1]; - // Getting the G values - float G1 = input[index_1 - col_size]; - float G2 = input[index_1 + col_size]; - float G3 = input[index_1 - 1]; - float G4 = input[index_1 + 1]; - // R - result[index_0] = (R1 + R2 + R3 + R4) / 4; - // G - result[index_1] = (G1 + G2 + G3 + G4) / 2; - // B - result[index_2] = input[index_2]; - } else { - // Bottom Green pixel - // Getting the R values - float R1 = input[index_0 - col_size]; - float R2 = input[index_0 + col_size]; - // Getting the B values - float B1 = input[index_2 - 1]; - float B2 = input[index_2 + 1]; - // R - result[index_0] = (R1 + R2) / 2; - // G - result[index_1] = input[index_1] * 2; - // B - result[index_2] = (B1 + B2) / 2; - } - } - __visc__return(1, bytes_result); +void demosaic_fxp(float *input, size_t bytes_input, float *result, + size_t bytes_result, size_t row_size, size_t col_size) { + __hpvm__hint(DEVICE); + __hpvm__attributes(2, input, result, 1, result); + + void *thisNode = __hpvm__getNode(); + int row = __hpvm__getNodeInstanceID_x(thisNode); + // for (int row = 1; row < row_size - 1; row++) + for (int col = 1; col < col_size - 1; col++) { + int index_0 = (0 * row_size + row) * col_size + col; + int index_1 = (1 * row_size + row) * col_size + col; + int index_2 = (2 * row_size + row) * col_size + col; + if (row % 2 == 0 && col % 2 == 0) { + // Green pixel + // Getting the R values + float R1 = input[index_0 - 1]; + float R2 = input[index_0 + 1]; + // Getting the B values + float B1 = input[index_2 - col_size]; + float B2 = input[index_2 + col_size]; + // R + result[index_0] = (R1 + R2) / 2; + // G + result[index_1] = input[index_1] * 2; + // B + result[index_2] = (B1 + B2) / 2; + } else if (row % 2 == 0 && col % 2 == 1) { + // Red pixel + // Getting the G values + float G1 = input[index_1 - col_size]; + float G2 = input[index_1 + col_size]; + float G3 = input[index_1 - 1]; + float G4 = input[index_1 + 1]; + // Getting the B values + float B1 = input[index_2 - col_size - 1]; + float B2 = input[index_2 - col_size + 1]; + float B3 = input[index_2 + col_size - 1]; + float B4 = input[index_2 + col_size + 1]; + // R + result[index_0] = input[index_0]; + // G + result[index_1] = (G1 + G2 + G3 + G4) / 2; + // B (center pixel) + result[index_2] = (B1 + B2 + B3 + B4) / 4; + } else if (row % 2 == 1 && col % 2 == 0) { + // Blue pixel + // Getting the R values + float R1 = input[index_0 - col_size - 1]; + float R2 = input[index_0 + col_size - 1]; + float R3 = input[index_0 - col_size + 1]; + float R4 = input[index_0 + col_size + 1]; + // Getting the G values + float G1 = input[index_1 - col_size]; + float G2 = input[index_1 + col_size]; + float G3 = input[index_1 - 1]; + float G4 = input[index_1 + 1]; + // R + result[index_0] = (R1 + R2 + R3 + R4) / 4; + // G + result[index_1] = (G1 + G2 + G3 + G4) / 2; + // B + result[index_2] = input[index_2]; + } else { + // Bottom Green pixel + // Getting the R values + float R1 = input[index_0 - col_size]; + float R2 = input[index_0 + col_size]; + // Getting the B values + float B1 = input[index_2 - 1]; + float B2 = input[index_2 + 1]; + // R + result[index_0] = (R1 + R2) / 2; + // G + result[index_1] = input[index_1] * 2; + // B + result[index_2] = (B1 + B2) / 2; + } + } + __hpvm__return(1, bytes_result); } // Leaf HPVM node function for denoise -void denoise_fxp(float *input, size_t bytes_input, - float *result, size_t bytes_result, - size_t row_size, size_t col_size) { - __visc__hint(CPU_TARGET); - __visc__attributes(2, input, result, 1, result); - - void* thisNode = __visc__getNode(); - int row = __visc__getNodeInstanceID_x(thisNode); +void denoise_fxp(float *input, size_t bytes_input, float *result, + size_t bytes_result, size_t row_size, size_t col_size) { + __hpvm__hint(CPU_TARGET); + __hpvm__attributes(2, input, result, 1, result); + + void *thisNode = __hpvm__getNode(); + int row = __hpvm__getNodeInstanceID_x(thisNode); for (int chan = 0; chan < CHAN_SIZE; chan++) -// for (int row = 0; row < row_size; row++) - for (int col = 0; col < col_size; col++) - if (row >= 1 && row < row_size - 1 && col >= 1 && col < col_size - 1) { - float filter[9]; - for (int i = -1; i < 2; i++) - for (int j = -1; j < 2; j++) { - int index = ((i+row) - row + 1) * 3 + (j+col) - col + 1; - filter[index] = input[(chan * row_size + (i + row)) * col_size + (j + col)]; - } - sort(filter, 9); - result[(chan * row_size + row) * col_size + col] = filter[4]; - } else { - result[(chan * row_size + row) * col_size + col] = input[(chan * row_size + row) * col_size + col]; - } - __visc__return(1, bytes_result); + // for (int row = 0; row < row_size; row++) + for (int col = 0; col < col_size; col++) + if (row >= 1 && row < row_size - 1 && col >= 1 && col < col_size - 1) { + float filter[9]; + for (int i = -1; i < 2; i++) + for (int j = -1; j < 2; j++) { + int index = ((i + row) - row + 1) * 3 + (j + col) - col + 1; + filter[index] = + input[(chan * row_size + (i + row)) * col_size + (j + col)]; + } + sort(filter, 9); + result[(chan * row_size + row) * col_size + col] = filter[4]; + } else { + result[(chan * row_size + row) * col_size + col] = + input[(chan * row_size + row) * col_size + col]; + } + __hpvm__return(1, bytes_result); } // Leaf HPVM node function, for color map and white balance transform -void transform_fxp(float *input, size_t bytes_input, - float *result, size_t bytes_result, - float *TsTw_tran, size_t bytes_TsTw, +void transform_fxp(float *input, size_t bytes_input, float *result, + size_t bytes_result, float *TsTw_tran, size_t bytes_TsTw, size_t row_size, size_t col_size) { - __visc__hint(DEVICE); - __visc__attributes(3, input, result, TsTw_tran, 1, result); - - void* thisNode = __visc__getNode(); - int row = __visc__getNodeInstanceID_x(thisNode); + __hpvm__hint(DEVICE); + __hpvm__attributes(3, input, result, TsTw_tran, 1, result); + + void *thisNode = __hpvm__getNode(); + int row = __hpvm__getNodeInstanceID_x(thisNode); for (int chan = 0; chan < CHAN_SIZE; chan++) -// for (int row = 0; row < row_size; row++) - for (int col = 0; col < col_size; col++) { - int index = (chan * row_size + row) * col_size + col; - int index_0 = (0 * row_size + row) * col_size + col; - int index_1 = (1 * row_size + row) * col_size + col; - int index_2 = (2 * row_size + row) * col_size + col; - int index_2d_0 = 0 * CHAN_SIZE + chan; - int index_2d_1 = 1 * CHAN_SIZE + chan; - int index_2d_2 = 2 * CHAN_SIZE + chan; - result[index] = - max(input[index_0] * TsTw_tran[index_2d_0] + - input[index_1] * TsTw_tran[index_2d_1] + - input[index_2] * TsTw_tran[index_2d_2], - 0); - } - __visc__return(1, bytes_result); + // for (int row = 0; row < row_size; row++) + for (int col = 0; col < col_size; col++) { + int index = (chan * row_size + row) * col_size + col; + int index_0 = (0 * row_size + row) * col_size + col; + int index_1 = (1 * row_size + row) * col_size + col; + int index_2 = (2 * row_size + row) * col_size + col; + int index_2d_0 = 0 * CHAN_SIZE + chan; + int index_2d_1 = 1 * CHAN_SIZE + chan; + int index_2d_2 = 2 * CHAN_SIZE + chan; + result[index] = max(input[index_0] * TsTw_tran[index_2d_0] + + input[index_1] * TsTw_tran[index_2d_1] + + input[index_2] * TsTw_tran[index_2d_2], + 0); + } + __hpvm__return(1, bytes_result); } // Leaf HPVM node function, for gamut mapping -void gamut_map_fxp(float *input, size_t bytes_input, - float *result, size_t bytes_result, - float *ctrl_pts, size_t bytes_ctrl_pts, - float *weights, size_t bytes_weights, - float *coefs, size_t bytes_coefs, - float *l2_dist, size_t bytes_l2_dist, +void gamut_map_fxp(float *input, size_t bytes_input, float *result, + size_t bytes_result, float *ctrl_pts, size_t bytes_ctrl_pts, + float *weights, size_t bytes_weights, float *coefs, + size_t bytes_coefs, float *l2_dist, size_t bytes_l2_dist, size_t row_size, size_t col_size) { - __visc__hint(CPU_TARGET); - __visc__attributes(6, input, result, ctrl_pts, weights, coefs, l2_dist, 2, result, l2_dist); - - // First, get the L2 norm from every pixel to the control points, - // Then, sum it and weight it. Finally, add the bias. - void* thisNode = __visc__getNode(); - int row = __visc__getNodeInstanceID_x(thisNode); -// for (int row = 0; row < row_size; row++) - for (int col = 0; col < col_size; col++) { - float chan_val_0 = 0.0; - float chan_val_1 = 0.0; - float chan_val_2 = 0.0; - for (int cp = 0; cp < 3702; cp++) { - int index_0 = (0 * row_size + row) * col_size + col; - int index_1 = (1 * row_size + row) * col_size + col; - int index_2 = (2 * row_size + row) * col_size + col; - float val1 = (input[index_0] - ctrl_pts[cp * 3 + 0]); - float val2 = (input[index_0] - ctrl_pts[cp * 3 + 0]); - float val3 = (input[index_1] - ctrl_pts[cp * 3 + 1]); - float val4 = (input[index_1] - ctrl_pts[cp * 3 + 1]); - float val5 = (input[index_2] - ctrl_pts[cp * 3 + 2]); - float val6 = (input[index_2] - ctrl_pts[cp * 3 + 2]); - float val = val1 * val2 + val3 * val4 + val5 * val6; - float sqrt_val = sqrt(val); - chan_val_0 += sqrt_val * weights[cp * CHAN_SIZE + 0]; - chan_val_1 += sqrt_val * weights[cp * CHAN_SIZE + 1]; - chan_val_2 += sqrt_val * weights[cp * CHAN_SIZE + 2]; - } - chan_val_0 += coefs[0 * CHAN_SIZE + 0] + - coefs[1 * CHAN_SIZE + 0] * input[(0 * row_size + row) * col_size + col] + - coefs[2 * CHAN_SIZE + 0] * input[(1 * row_size + row) * col_size + col] + - coefs[3 * CHAN_SIZE + 0] * input[(2 * row_size + row) * col_size + col]; - chan_val_1 += coefs[0 * CHAN_SIZE + 1] + - coefs[1 * CHAN_SIZE + 1] * input[(0 * row_size + row) * col_size + col] + - coefs[2 * CHAN_SIZE + 1] * input[(1 * row_size + row) * col_size + col] + - coefs[3 * CHAN_SIZE + 1] * input[(2 * row_size + row) * col_size + col]; - chan_val_2 += coefs[0 * CHAN_SIZE + 2] + - coefs[1 * CHAN_SIZE + 2] * input[(0 * row_size + row) * col_size + col] + - coefs[2 * CHAN_SIZE + 2] * input[(1 * row_size + row) * col_size + col] + - coefs[3 * CHAN_SIZE + 2] * input[(2 * row_size + row) * col_size + col]; - result[(0 * row_size + row) * col_size + col] = max(chan_val_0, 0); - result[(1 * row_size + row) * col_size + col] = max(chan_val_1, 0); - result[(2 * row_size + row) * col_size + col] = max(chan_val_2, 0); + __hpvm__hint(CPU_TARGET); + __hpvm__attributes(6, input, result, ctrl_pts, weights, coefs, l2_dist, 2, + result, l2_dist); + + // First, get the L2 norm from every pixel to the control points, + // Then, sum it and weight it. Finally, add the bias. + void *thisNode = __hpvm__getNode(); + int row = __hpvm__getNodeInstanceID_x(thisNode); + // for (int row = 0; row < row_size; row++) + for (int col = 0; col < col_size; col++) { + float chan_val_0 = 0.0; + float chan_val_1 = 0.0; + float chan_val_2 = 0.0; + for (int cp = 0; cp < 3702; cp++) { + int index_0 = (0 * row_size + row) * col_size + col; + int index_1 = (1 * row_size + row) * col_size + col; + int index_2 = (2 * row_size + row) * col_size + col; + float val1 = (input[index_0] - ctrl_pts[cp * 3 + 0]); + float val2 = (input[index_0] - ctrl_pts[cp * 3 + 0]); + float val3 = (input[index_1] - ctrl_pts[cp * 3 + 1]); + float val4 = (input[index_1] - ctrl_pts[cp * 3 + 1]); + float val5 = (input[index_2] - ctrl_pts[cp * 3 + 2]); + float val6 = (input[index_2] - ctrl_pts[cp * 3 + 2]); + float val = val1 * val2 + val3 * val4 + val5 * val6; + float sqrt_val = sqrt(val); + chan_val_0 += sqrt_val * weights[cp * CHAN_SIZE + 0]; + chan_val_1 += sqrt_val * weights[cp * CHAN_SIZE + 1]; + chan_val_2 += sqrt_val * weights[cp * CHAN_SIZE + 2]; } - __visc__return(1, bytes_result); + chan_val_0 += + coefs[0 * CHAN_SIZE + 0] + + coefs[1 * CHAN_SIZE + 0] * + input[(0 * row_size + row) * col_size + col] + + coefs[2 * CHAN_SIZE + 0] * + input[(1 * row_size + row) * col_size + col] + + coefs[3 * CHAN_SIZE + 0] * input[(2 * row_size + row) * col_size + col]; + chan_val_1 += + coefs[0 * CHAN_SIZE + 1] + + coefs[1 * CHAN_SIZE + 1] * + input[(0 * row_size + row) * col_size + col] + + coefs[2 * CHAN_SIZE + 1] * + input[(1 * row_size + row) * col_size + col] + + coefs[3 * CHAN_SIZE + 1] * input[(2 * row_size + row) * col_size + col]; + chan_val_2 += + coefs[0 * CHAN_SIZE + 2] + + coefs[1 * CHAN_SIZE + 2] * + input[(0 * row_size + row) * col_size + col] + + coefs[2 * CHAN_SIZE + 2] * + input[(1 * row_size + row) * col_size + col] + + coefs[3 * CHAN_SIZE + 2] * input[(2 * row_size + row) * col_size + col]; + result[(0 * row_size + row) * col_size + col] = max(chan_val_0, 0); + result[(1 * row_size + row) * col_size + col] = max(chan_val_1, 0); + result[(2 * row_size + row) * col_size + col] = max(chan_val_2, 0); + } + __hpvm__return(1, bytes_result); } // HPVM leaf node function, for tone mapping -void tone_map_fxp(float *input, size_t bytes_input, - float *result, size_t bytes_result, - float *tone_map, size_t bytes_tone_map, +void tone_map_fxp(float *input, size_t bytes_input, float *result, + size_t bytes_result, float *tone_map, size_t bytes_tone_map, size_t row_size, size_t col_size) { - __visc__hint(DEVICE); - __visc__attributes(3, input, result, tone_map, 1, result); - - void* thisNode = __visc__getNode(); - int row = __visc__getNodeInstanceID_x(thisNode); + __hpvm__hint(DEVICE); + __hpvm__attributes(3, input, result, tone_map, 1, result); + + void *thisNode = __hpvm__getNode(); + int row = __hpvm__getNodeInstanceID_x(thisNode); for (int chan = 0; chan < CHAN_SIZE; chan++) -// for (int row = 0; row < row_size; row++) - for (int col = 0; col < col_size; col++) { - int index = (chan * row_size + row) * col_size + col; - uint8_t x = input[index] * 255; - result[index] = tone_map[x * CHAN_SIZE + chan]; - } - __visc__return(1, bytes_result); + // for (int row = 0; row < row_size; row++) + for (int col = 0; col < col_size; col++) { + int index = (chan * row_size + row) * col_size + col; + uint8_t x = input[index] * 255; + result[index] = tone_map[x * CHAN_SIZE + chan]; + } + __hpvm__return(1, bytes_result); } /********************************************************************/ @@ -400,185 +415,184 @@ void tone_map_fxp(float *input, size_t bytes_input, // requirement for the FPGA backend . The CPU backend also supports this, // so it does not cause a portability issue. -void scale_fxp_wrapper(uint8_t *input, size_t bytes_input, - float *result, size_t bytes_result, - size_t row_size, size_t col_size) { - __visc__hint(CPU_TARGET); - __visc__attributes(2, input, result, 1, result); +void scale_fxp_wrapper(uint8_t *input, size_t bytes_input, float *result, + size_t bytes_result, size_t row_size, size_t col_size) { + __hpvm__hint(CPU_TARGET); + __hpvm__attributes(2, input, result, 1, result); // Create an 1D (specified by 1st argument) HPVM node with 1 dynamic // instance (last argument) associated with node function scale_fxp - void *ScaleNode = __visc__createNodeND(1, scale_fxp, row_size); + void *ScaleNode = __hpvm__createNodeND(1, scale_fxp, row_size); // Binds inputs of current node with specified node // - destination node // - argument position in argument list of function of source node // - argument position in argument list of function of destination node // - streaming (1) or non-streaming (0) - __visc__bindIn(ScaleNode, 0, 0, 0); // bind input - __visc__bindIn(ScaleNode, 1, 1, 0); // bind bytes_input - __visc__bindIn(ScaleNode, 2, 2, 0); // bind result - __visc__bindIn(ScaleNode, 3, 3, 0); // bind bytes_result - __visc__bindIn(ScaleNode, 4, 4, 0); // bind row_size - __visc__bindIn(ScaleNode, 5, 5, 0); // bind col_size + __hpvm__bindIn(ScaleNode, 0, 0, 0); // bind input + __hpvm__bindIn(ScaleNode, 1, 1, 0); // bind bytes_input + __hpvm__bindIn(ScaleNode, 2, 2, 0); // bind result + __hpvm__bindIn(ScaleNode, 3, 3, 0); // bind bytes_result + __hpvm__bindIn(ScaleNode, 4, 4, 0); // bind row_size + __hpvm__bindIn(ScaleNode, 5, 5, 0); // bind col_size // Similar to bindIn, but for the output. Output of a node is a struct, and // we consider the fields in increasing ordering. - __visc__bindOut(ScaleNode, 0, 0, 0); + __hpvm__bindOut(ScaleNode, 0, 0, 0); } -void descale_fxp_wrapper(float *input, size_t bytes_input, - uint8_t *result, size_t bytes_result, - size_t row_size, size_t col_size) { - __visc__hint(CPU_TARGET); - __visc__attributes(2, input, result, 1, result); - void *DescaleNode = __visc__createNodeND(1, descale_fxp, row_size); - __visc__bindIn(DescaleNode, 0, 0, 0); // bind input - __visc__bindIn(DescaleNode, 1, 1, 0); // bind bytes_input - __visc__bindIn(DescaleNode, 2, 2, 0); // bind result - __visc__bindIn(DescaleNode, 3, 3, 0); // bind bytes_result - __visc__bindIn(DescaleNode, 4, 4, 0); // bind row_size - __visc__bindIn(DescaleNode, 5, 5, 0); // bind col_size - - __visc__bindOut(DescaleNode, 0, 0, 0); +void descale_fxp_wrapper(float *input, size_t bytes_input, uint8_t *result, + size_t bytes_result, size_t row_size, + size_t col_size) { + __hpvm__hint(CPU_TARGET); + __hpvm__attributes(2, input, result, 1, result); + void *DescaleNode = __hpvm__createNodeND(1, descale_fxp, row_size); + __hpvm__bindIn(DescaleNode, 0, 0, 0); // bind input + __hpvm__bindIn(DescaleNode, 1, 1, 0); // bind bytes_input + __hpvm__bindIn(DescaleNode, 2, 2, 0); // bind result + __hpvm__bindIn(DescaleNode, 3, 3, 0); // bind bytes_result + __hpvm__bindIn(DescaleNode, 4, 4, 0); // bind row_size + __hpvm__bindIn(DescaleNode, 5, 5, 0); // bind col_size + + __hpvm__bindOut(DescaleNode, 0, 0, 0); } -void demosaic_fxp_wrapper(float *input, size_t bytes_input, - float *result, size_t bytes_result, - size_t row_size, size_t col_size) { - __visc__hint(CPU_TARGET); - __visc__attributes(2, input, result, 1, result); - void *DemosaicNode = __visc__createNodeND(1, demosaic_fxp, row_size); - __visc__bindIn(DemosaicNode, 0, 0, 0); // bind input - __visc__bindIn(DemosaicNode, 1, 1, 0); // bind bytes_input - __visc__bindIn(DemosaicNode, 2, 2, 0); // bind result - __visc__bindIn(DemosaicNode, 3, 3, 0); // bind bytes_result - __visc__bindIn(DemosaicNode, 4, 4, 0); // bind row_size - __visc__bindIn(DemosaicNode, 5, 5, 0); // bind col_size - - __visc__bindOut(DemosaicNode, 0, 0, 0); +void demosaic_fxp_wrapper(float *input, size_t bytes_input, float *result, + size_t bytes_result, size_t row_size, + size_t col_size) { + __hpvm__hint(CPU_TARGET); + __hpvm__attributes(2, input, result, 1, result); + void *DemosaicNode = __hpvm__createNodeND(1, demosaic_fxp, row_size); + __hpvm__bindIn(DemosaicNode, 0, 0, 0); // bind input + __hpvm__bindIn(DemosaicNode, 1, 1, 0); // bind bytes_input + __hpvm__bindIn(DemosaicNode, 2, 2, 0); // bind result + __hpvm__bindIn(DemosaicNode, 3, 3, 0); // bind bytes_result + __hpvm__bindIn(DemosaicNode, 4, 4, 0); // bind row_size + __hpvm__bindIn(DemosaicNode, 5, 5, 0); // bind col_size + + __hpvm__bindOut(DemosaicNode, 0, 0, 0); } -void denoise_fxp_wrapper(float *input, size_t bytes_input, - float *result, size_t bytes_result, - size_t row_size, size_t col_size) { - __visc__hint(CPU_TARGET); - __visc__attributes(2, input, result, 1, result); - void *DenoiseNode = __visc__createNodeND(1, denoise_fxp, row_size); - __visc__bindIn(DenoiseNode, 0, 0, 0); // bind input - __visc__bindIn(DenoiseNode, 1, 1, 0); // bind bytes_input - __visc__bindIn(DenoiseNode, 2, 2, 0); // bind result - __visc__bindIn(DenoiseNode, 3, 3, 0); // bind bytes_result - __visc__bindIn(DenoiseNode, 4, 4, 0); // bind row_size - __visc__bindIn(DenoiseNode, 5, 5, 0); // bind col_size - - __visc__bindOut(DenoiseNode, 0, 0, 0); +void denoise_fxp_wrapper(float *input, size_t bytes_input, float *result, + size_t bytes_result, size_t row_size, + size_t col_size) { + __hpvm__hint(CPU_TARGET); + __hpvm__attributes(2, input, result, 1, result); + void *DenoiseNode = __hpvm__createNodeND(1, denoise_fxp, row_size); + __hpvm__bindIn(DenoiseNode, 0, 0, 0); // bind input + __hpvm__bindIn(DenoiseNode, 1, 1, 0); // bind bytes_input + __hpvm__bindIn(DenoiseNode, 2, 2, 0); // bind result + __hpvm__bindIn(DenoiseNode, 3, 3, 0); // bind bytes_result + __hpvm__bindIn(DenoiseNode, 4, 4, 0); // bind row_size + __hpvm__bindIn(DenoiseNode, 5, 5, 0); // bind col_size + + __hpvm__bindOut(DenoiseNode, 0, 0, 0); } -void transform_fxp_wrapper(float *input, size_t bytes_input, - float *result, size_t bytes_result, - float *TsTw_tran, size_t bytes_TsTw, - size_t row_size, size_t col_size) { - __visc__hint(CPU_TARGET); - __visc__attributes(3, input, result, TsTw_tran, 1, result); - void *TransformNode = __visc__createNodeND(1, transform_fxp, row_size); - __visc__bindIn(TransformNode, 0, 0, 0); // bind input - __visc__bindIn(TransformNode, 1, 1, 0); // bind bytes_input - __visc__bindIn(TransformNode, 2, 2, 0); // bind result - __visc__bindIn(TransformNode, 3, 3, 0); // bind bytes_result - __visc__bindIn(TransformNode, 4, 4, 0); // bind tstw - __visc__bindIn(TransformNode, 5, 5, 0); // bind bytes_tstw - __visc__bindIn(TransformNode, 6, 6, 0); // bind row_size - __visc__bindIn(TransformNode, 7, 7, 0); // bind col_size - - __visc__bindOut(TransformNode, 0, 0, 0); +void transform_fxp_wrapper(float *input, size_t bytes_input, float *result, + size_t bytes_result, float *TsTw_tran, + size_t bytes_TsTw, size_t row_size, + size_t col_size) { + __hpvm__hint(CPU_TARGET); + __hpvm__attributes(3, input, result, TsTw_tran, 1, result); + void *TransformNode = __hpvm__createNodeND(1, transform_fxp, row_size); + __hpvm__bindIn(TransformNode, 0, 0, 0); // bind input + __hpvm__bindIn(TransformNode, 1, 1, 0); // bind bytes_input + __hpvm__bindIn(TransformNode, 2, 2, 0); // bind result + __hpvm__bindIn(TransformNode, 3, 3, 0); // bind bytes_result + __hpvm__bindIn(TransformNode, 4, 4, 0); // bind tstw + __hpvm__bindIn(TransformNode, 5, 5, 0); // bind bytes_tstw + __hpvm__bindIn(TransformNode, 6, 6, 0); // bind row_size + __hpvm__bindIn(TransformNode, 7, 7, 0); // bind col_size + + __hpvm__bindOut(TransformNode, 0, 0, 0); } -void gamut_fxp_wrapper(float *input, size_t bytes_input, - float *result, size_t bytes_result, - float *ctrl_pts, size_t bytes_ctrl_pts, - float *weights, size_t bytes_weights, - float *coefs, size_t bytes_coefs, - float *l2_dist, size_t bytes_l2_dist, - size_t row_size, size_t col_size) { - __visc__hint(CPU_TARGET); - __visc__attributes(6, input, result, ctrl_pts, weights, coefs, l2_dist, 1, result); - void *GamutNode = __visc__createNodeND(1, gamut_map_fxp, row_size); - __visc__bindIn(GamutNode, 0, 0, 0); // bind input - __visc__bindIn(GamutNode, 1, 1, 0); // bind bytes_input - __visc__bindIn(GamutNode, 2, 2, 0); // bind result - __visc__bindIn(GamutNode, 3, 3, 0); // bind bytes_result - __visc__bindIn(GamutNode, 4, 4, 0); // bind ctrl_pts - __visc__bindIn(GamutNode, 5, 5, 0); // bind bytes_ctrl_pts - __visc__bindIn(GamutNode, 6, 6, 0); // bind weights - __visc__bindIn(GamutNode, 7, 7, 0); // bind bytes_weights - __visc__bindIn(GamutNode, 8, 8, 0); // bind coefs - __visc__bindIn(GamutNode, 9, 9, 0); // bind bytes_coefs - __visc__bindIn(GamutNode, 10, 10, 0); // bind l2_dist - __visc__bindIn(GamutNode, 11, 11, 0); // bind bytes_l2_dist - __visc__bindIn(GamutNode, 12, 12, 0); // bind row_size - __visc__bindIn(GamutNode, 13, 13, 0); // bind col_size - - __visc__bindOut(GamutNode, 0, 0, 0); +void gamut_fxp_wrapper(float *input, size_t bytes_input, float *result, + size_t bytes_result, float *ctrl_pts, + size_t bytes_ctrl_pts, float *weights, + size_t bytes_weights, float *coefs, size_t bytes_coefs, + float *l2_dist, size_t bytes_l2_dist, size_t row_size, + size_t col_size) { + __hpvm__hint(CPU_TARGET); + __hpvm__attributes(6, input, result, ctrl_pts, weights, coefs, l2_dist, 1, + result); + void *GamutNode = __hpvm__createNodeND(1, gamut_map_fxp, row_size); + __hpvm__bindIn(GamutNode, 0, 0, 0); // bind input + __hpvm__bindIn(GamutNode, 1, 1, 0); // bind bytes_input + __hpvm__bindIn(GamutNode, 2, 2, 0); // bind result + __hpvm__bindIn(GamutNode, 3, 3, 0); // bind bytes_result + __hpvm__bindIn(GamutNode, 4, 4, 0); // bind ctrl_pts + __hpvm__bindIn(GamutNode, 5, 5, 0); // bind bytes_ctrl_pts + __hpvm__bindIn(GamutNode, 6, 6, 0); // bind weights + __hpvm__bindIn(GamutNode, 7, 7, 0); // bind bytes_weights + __hpvm__bindIn(GamutNode, 8, 8, 0); // bind coefs + __hpvm__bindIn(GamutNode, 9, 9, 0); // bind bytes_coefs + __hpvm__bindIn(GamutNode, 10, 10, 0); // bind l2_dist + __hpvm__bindIn(GamutNode, 11, 11, 0); // bind bytes_l2_dist + __hpvm__bindIn(GamutNode, 12, 12, 0); // bind row_size + __hpvm__bindIn(GamutNode, 13, 13, 0); // bind col_size + + __hpvm__bindOut(GamutNode, 0, 0, 0); } -void tone_map_fxp_wrapper(float *input, size_t bytes_input, - float *result, size_t bytes_result, - float *tone_map, size_t bytes_tone_map, - size_t row_size, size_t col_size) { - - __visc__hint(CPU_TARGET); - __visc__attributes(3, input, result, tone_map, 1, result); - void *ToneMapNode = __visc__createNodeND(1, tone_map_fxp, row_size); - __visc__bindIn(ToneMapNode, 0, 0, 0); // bind input - __visc__bindIn(ToneMapNode, 1, 1, 0); // bind bytes_input - __visc__bindIn(ToneMapNode, 2, 2, 0); // bind result - __visc__bindIn(ToneMapNode, 3, 3, 0); // bind bytes_result - __visc__bindIn(ToneMapNode, 4, 4, 0); // bind tone_map - __visc__bindIn(ToneMapNode, 5, 5, 0); // bind bytes_tone_map - __visc__bindIn(ToneMapNode, 6, 6, 0); // bind row_size - __visc__bindIn(ToneMapNode, 7, 7, 0); // bind col_size - - __visc__bindOut(ToneMapNode, 0, 0, 0); +void tone_map_fxp_wrapper(float *input, size_t bytes_input, float *result, + size_t bytes_result, float *tone_map, + size_t bytes_tone_map, size_t row_size, + size_t col_size) { + + __hpvm__hint(CPU_TARGET); + __hpvm__attributes(3, input, result, tone_map, 1, result); + void *ToneMapNode = __hpvm__createNodeND(1, tone_map_fxp, row_size); + __hpvm__bindIn(ToneMapNode, 0, 0, 0); // bind input + __hpvm__bindIn(ToneMapNode, 1, 1, 0); // bind bytes_input + __hpvm__bindIn(ToneMapNode, 2, 2, 0); // bind result + __hpvm__bindIn(ToneMapNode, 3, 3, 0); // bind bytes_result + __hpvm__bindIn(ToneMapNode, 4, 4, 0); // bind tone_map + __hpvm__bindIn(ToneMapNode, 5, 5, 0); // bind bytes_tone_map + __hpvm__bindIn(ToneMapNode, 6, 6, 0); // bind row_size + __hpvm__bindIn(ToneMapNode, 7, 7, 0); // bind col_size + + __hpvm__bindOut(ToneMapNode, 0, 0, 0); } - /*** ROOT Node - Top Level of the Graph Hierarchy ***/ -void CamPipeRoot(/*0*/ uint8_t *input, /*1*/ size_t bytes_input, - /*2*/ uint8_t *result, /*3*/ size_t bytes_result, - /*4*/ float *input_scaled, /*5*/ size_t bytes_input_scaled, - /*6*/ float *result_scaled, /*7*/ size_t bytes_result_scaled, - /*8*/ float *demosaic_out, /*9*/ size_t bytes_demosaic_out, - /*10*/ float *denoise_out, /*11*/ size_t bytes_denoise_out, - /*12*/ float *transform_out, /*13*/ size_t bytes_transform_out, - /*14*/ float *gamut_out, /*15*/ size_t bytes_gamut_out, - /*16*/ float *TsTw, /*17*/ size_t bytes_TsTw, - /*18*/ float *ctrl_pts, /*19*/ size_t bytes_ctrl_pts, - /*20*/ float *weights, /*21*/ size_t bytes_weights, - /*22*/ float*coefs, /*23*/ size_t bytes_coefs, - /*24*/ float *l2_dist, /*25*/ size_t bytes_l2_dist, - /*26*/ float *tone_map, /*27*/ size_t bytes_tone_map, - /*28*/ size_t row_size, /*29*/ size_t col_size) { - - //Specifies compilation target for current node - __visc__hint(CPU_TARGET); +void CamPipeRoot(/*0*/ uint8_t *input, /*1*/ size_t bytes_input, + /*2*/ uint8_t *result, /*3*/ size_t bytes_result, + /*4*/ float *input_scaled, /*5*/ size_t bytes_input_scaled, + /*6*/ float *result_scaled, /*7*/ size_t bytes_result_scaled, + /*8*/ float *demosaic_out, /*9*/ size_t bytes_demosaic_out, + /*10*/ float *denoise_out, /*11*/ size_t bytes_denoise_out, + /*12*/ float *transform_out, /*13*/ size_t bytes_transform_out, + /*14*/ float *gamut_out, /*15*/ size_t bytes_gamut_out, + /*16*/ float *TsTw, /*17*/ size_t bytes_TsTw, + /*18*/ float *ctrl_pts, /*19*/ size_t bytes_ctrl_pts, + /*20*/ float *weights, /*21*/ size_t bytes_weights, + /*22*/ float *coefs, /*23*/ size_t bytes_coefs, + /*24*/ float *l2_dist, /*25*/ size_t bytes_l2_dist, + /*26*/ float *tone_map, /*27*/ size_t bytes_tone_map, + /*28*/ size_t row_size, /*29*/ size_t col_size) { + + // Specifies compilation target for current node + __hpvm__hint(CPU_TARGET); // Specifies pointer arguments that will be used as "in" and "out" arguments // - count of "in" arguments // - list of "in" argument , and similar for "out" - __visc__attributes(14, input, result, input_scaled, result_scaled, demosaic_out, denoise_out, - transform_out, gamut_out, TsTw, ctrl_pts, weights, coefs, tone_map, l2_dist, - 5, result, demosaic_out, denoise_out, transform_out, gamut_out); + __hpvm__attributes(14, input, result, input_scaled, result_scaled, + demosaic_out, denoise_out, transform_out, gamut_out, TsTw, + ctrl_pts, weights, coefs, tone_map, l2_dist, 5, result, + demosaic_out, denoise_out, transform_out, gamut_out); // Create an 0D (specified by 1st argument) HPVM node - so a single node // associated with node function ---_fxp_wrapper - void* ScNode = __visc__createNodeND(0, scale_fxp_wrapper); - void* DmNode = __visc__createNodeND(0, demosaic_fxp_wrapper); - void *DnNode = __visc__createNodeND(0, denoise_fxp_wrapper); - void *TrNode = __visc__createNodeND(0, transform_fxp_wrapper); - void *GmNode = __visc__createNodeND(0, gamut_fxp_wrapper); - void *TnNode = __visc__createNodeND(0, tone_map_fxp_wrapper); - void *DsNode = __visc__createNodeND(0, descale_fxp_wrapper); - + void *ScNode = __hpvm__createNodeND(0, scale_fxp_wrapper); + void *DmNode = __hpvm__createNodeND(0, demosaic_fxp_wrapper); + void *DnNode = __hpvm__createNodeND(0, denoise_fxp_wrapper); + void *TrNode = __hpvm__createNodeND(0, transform_fxp_wrapper); + void *GmNode = __hpvm__createNodeND(0, gamut_fxp_wrapper); + void *TnNode = __hpvm__createNodeND(0, tone_map_fxp_wrapper); + void *DsNode = __hpvm__createNodeND(0, descale_fxp_wrapper); + // BindIn binds inputs of current node with specified node // - destination node // - argument position in argument list of function of source node @@ -592,268 +606,281 @@ void CamPipeRoot(/*0*/ uint8_t *input, /*1*/ size_t bytes_input, // - destination position (in argument list of destination node) // - streaming (1) or non-streaming (0) - // scale_fxp inputs - __visc__bindIn(ScNode, 0, 0, 0); // input -> ScNode:input - __visc__bindIn(ScNode, 1, 1, 0); // bytes_input -> ScNode:bytes_input - __visc__bindIn(ScNode, 4, 2, 0); // input_scaled -> ScNode:result - __visc__bindIn(ScNode, 5, 3, 0); // bytes_input_scaled -> ScNode:bytes_result - __visc__bindIn(ScNode, 28, 4, 0); // row_size -> ScNode:row_size - __visc__bindIn(ScNode, 29, 5, 0); // col_size -> ScNode:col_size - - // demosaic_fxp inputs - __visc__bindIn(DmNode, 4, 0, 0); // input_scaled -> DmNode:input - __visc__edge(ScNode, DmNode, 1, 0, 1, 0); // SCNode:bytes_result -> DmNode:bytes_input - __visc__bindIn(DmNode, 8, 2, 0); // demosaic_out -> DmNode:result - __visc__bindIn(DmNode, 9, 3, 0); // bytes_demosaic_out -> DmNode:bytes_result - __visc__bindIn(DmNode, 28, 4, 0); // row_size -> DmNode:row_size - __visc__bindIn(DmNode, 29, 5, 0); // col_size -> DmNode:col_size - - // denoise_fxp inputs - __visc__bindIn(DnNode, 8, 0, 0); // demosaic_out -> DnNode:input - __visc__edge(DmNode, DnNode, 1, 0, 1, 0); // DMNode:bytes_result -> DnNode:bytes_input - __visc__bindIn(DnNode, 10, 2, 0); // denoise_out -> DnNode:result - __visc__bindIn(DnNode, 11, 3, 0); // bytes_denoise_out -> DnNode:bytes_result - __visc__bindIn(DnNode, 28, 4, 0); // row_size -> DnNode:row_size - __visc__bindIn(DnNode, 29, 5, 0); // col_size -> DnNode:col_size - - // transform_fxp inputs - __visc__bindIn(TrNode, 10, 0, 0); // denoise_out -> TrNode:input - __visc__edge(DnNode, TrNode, 1, 0, 1, 0); // DnNode:bytes_result -> TrNode:bytes_input - __visc__bindIn(TrNode, 12, 2, 0); // transform_out -> TrNode:result - __visc__bindIn(TrNode, 13, 3, 0); // bytes_result_scaled -> TrNode:bytes_result - __visc__bindIn(TrNode, 16, 4, 0); // TsTw -> TrNode:TsTw_trann - __visc__bindIn(TrNode, 17, 5, 0); // bytes_TsTw -> TrNode:bytes_TsTw - __visc__bindIn(TrNode, 28, 6, 0); // row_size -> TrNode:row_size - __visc__bindIn(TrNode, 29, 7, 0); // col_size -> TrNode:col_size - - // gamut_fxp inputs - __visc__bindIn(GmNode, 12, 0, 0); // transform_out -> GmNode:input - __visc__edge(TrNode, GmNode, 1, 0, 1, 0); // TrNode:bytes_result -> GmNode:bytes_input - __visc__bindIn(GmNode, 14, 2, 0); // gamut_out -> GmNode:result - __visc__bindIn(GmNode, 15, 3, 0); // bytes_gamut_out -> GmNode:bytes_result - __visc__bindIn(GmNode, 18, 4, 0); // ctrl_pts -> GmNode:ctrl_pts - __visc__bindIn(GmNode, 19, 5, 0); // bytes_ctrl_pts -> GmNode:bytes_ctrl_pts - __visc__bindIn(GmNode, 20, 6, 0); // weights -> GmNode:weights - __visc__bindIn(GmNode, 21, 7, 0); // bytes_weights -> GmNode:bytes_weights - __visc__bindIn(GmNode, 22, 8, 0); // coefs -> GmNode:coefs - __visc__bindIn(GmNode, 23, 9, 0); // bytes_coefs -> GmNode:bytes_coefs - __visc__bindIn(GmNode, 24, 10, 0); // l2_dist -> GmNode: l2_dist - __visc__bindIn(GmNode, 25, 11, 0); // bytes_l2_dist -> GmNode:bytes_l2_dist - __visc__bindIn(GmNode, 28, 12, 0); // row_size -> GmNode:row_size - __visc__bindIn(GmNode, 29, 13, 0); // col_size -> GmNode:col_size - - // tone_map_fxp inputs - __visc__bindIn(TnNode, 14, 0, 0); // gamut_out -> TnNode:input - __visc__edge(GmNode, TnNode, 1, 0, 1, 0); // GmNode:bytes_result -> TnNode:bytes_input - __visc__bindIn(TnNode, 6, 2, 0); // result_scaled -> TnNode:result - __visc__bindIn(TnNode, 7, 3, 0); // bytes_result_scaled -> TnNode:bytes_result - __visc__bindIn(TnNode, 26, 4, 0); // tone_map -> TnNode:tone_map - __visc__bindIn(TnNode, 27, 5, 0); // bytes_tone_map -> TnNode:bytes_tone_map - __visc__bindIn(TnNode, 28, 6, 0); // row_size -> TnNode:row_size - __visc__bindIn(TnNode, 29, 7, 0); // col_size -> TnNode:col_size - - // descale_fxp inputs - __visc__bindIn(DsNode, 6, 0, 0); // result_scaled -> DsNode:input - __visc__edge(TnNode, DsNode, 1, 0, 1, 0); // TnNode:bytes_result -> DsNode:bytes_input - __visc__bindIn(DsNode, 2, 2, 0); // result -> DsNode:result - __visc__bindIn(DsNode, 3, 3, 0); // bytes_result -> DsNode:bytes_result - __visc__bindIn(DsNode, 28, 4, 0); // row_size -> DsNode:row_size - __visc__bindIn(DsNode, 29, 5, 0); // col_size -> DsNode:col_size + // scale_fxp inputs + __hpvm__bindIn(ScNode, 0, 0, 0); // input -> ScNode:input + __hpvm__bindIn(ScNode, 1, 1, 0); // bytes_input -> ScNode:bytes_input + __hpvm__bindIn(ScNode, 4, 2, 0); // input_scaled -> ScNode:result + __hpvm__bindIn(ScNode, 5, 3, 0); // bytes_input_scaled -> ScNode:bytes_result + __hpvm__bindIn(ScNode, 28, 4, 0); // row_size -> ScNode:row_size + __hpvm__bindIn(ScNode, 29, 5, 0); // col_size -> ScNode:col_size + + // demosaic_fxp inputs + __hpvm__bindIn(DmNode, 4, 0, 0); // input_scaled -> DmNode:input + __hpvm__edge(ScNode, DmNode, 1, 0, 1, + 0); // SCNode:bytes_result -> DmNode:bytes_input + __hpvm__bindIn(DmNode, 8, 2, 0); // demosaic_out -> DmNode:result + __hpvm__bindIn(DmNode, 9, 3, 0); // bytes_demosaic_out -> DmNode:bytes_result + __hpvm__bindIn(DmNode, 28, 4, 0); // row_size -> DmNode:row_size + __hpvm__bindIn(DmNode, 29, 5, 0); // col_size -> DmNode:col_size + + // denoise_fxp inputs + __hpvm__bindIn(DnNode, 8, 0, 0); // demosaic_out -> DnNode:input + __hpvm__edge(DmNode, DnNode, 1, 0, 1, + 0); // DMNode:bytes_result -> DnNode:bytes_input + __hpvm__bindIn(DnNode, 10, 2, 0); // denoise_out -> DnNode:result + __hpvm__bindIn(DnNode, 11, 3, 0); // bytes_denoise_out -> DnNode:bytes_result + __hpvm__bindIn(DnNode, 28, 4, 0); // row_size -> DnNode:row_size + __hpvm__bindIn(DnNode, 29, 5, 0); // col_size -> DnNode:col_size + + // transform_fxp inputs + __hpvm__bindIn(TrNode, 10, 0, 0); // denoise_out -> TrNode:input + __hpvm__edge(DnNode, TrNode, 1, 0, 1, + 0); // DnNode:bytes_result -> TrNode:bytes_input + __hpvm__bindIn(TrNode, 12, 2, 0); // transform_out -> TrNode:result + __hpvm__bindIn(TrNode, 13, 3, + 0); // bytes_result_scaled -> TrNode:bytes_result + __hpvm__bindIn(TrNode, 16, 4, 0); // TsTw -> TrNode:TsTw_trann + __hpvm__bindIn(TrNode, 17, 5, 0); // bytes_TsTw -> TrNode:bytes_TsTw + __hpvm__bindIn(TrNode, 28, 6, 0); // row_size -> TrNode:row_size + __hpvm__bindIn(TrNode, 29, 7, 0); // col_size -> TrNode:col_size + + // gamut_fxp inputs + __hpvm__bindIn(GmNode, 12, 0, 0); // transform_out -> GmNode:input + __hpvm__edge(TrNode, GmNode, 1, 0, 1, + 0); // TrNode:bytes_result -> GmNode:bytes_input + __hpvm__bindIn(GmNode, 14, 2, 0); // gamut_out -> GmNode:result + __hpvm__bindIn(GmNode, 15, 3, 0); // bytes_gamut_out -> GmNode:bytes_result + __hpvm__bindIn(GmNode, 18, 4, 0); // ctrl_pts -> GmNode:ctrl_pts + __hpvm__bindIn(GmNode, 19, 5, 0); // bytes_ctrl_pts -> GmNode:bytes_ctrl_pts + __hpvm__bindIn(GmNode, 20, 6, 0); // weights -> GmNode:weights + __hpvm__bindIn(GmNode, 21, 7, 0); // bytes_weights -> GmNode:bytes_weights + __hpvm__bindIn(GmNode, 22, 8, 0); // coefs -> GmNode:coefs + __hpvm__bindIn(GmNode, 23, 9, 0); // bytes_coefs -> GmNode:bytes_coefs + __hpvm__bindIn(GmNode, 24, 10, 0); // l2_dist -> GmNode: l2_dist + __hpvm__bindIn(GmNode, 25, 11, 0); // bytes_l2_dist -> GmNode:bytes_l2_dist + __hpvm__bindIn(GmNode, 28, 12, 0); // row_size -> GmNode:row_size + __hpvm__bindIn(GmNode, 29, 13, 0); // col_size -> GmNode:col_size + + // tone_map_fxp inputs + __hpvm__bindIn(TnNode, 14, 0, 0); // gamut_out -> TnNode:input + __hpvm__edge(GmNode, TnNode, 1, 0, 1, + 0); // GmNode:bytes_result -> TnNode:bytes_input + __hpvm__bindIn(TnNode, 6, 2, 0); // result_scaled -> TnNode:result + __hpvm__bindIn(TnNode, 7, 3, 0); // bytes_result_scaled -> TnNode:bytes_result + __hpvm__bindIn(TnNode, 26, 4, 0); // tone_map -> TnNode:tone_map + __hpvm__bindIn(TnNode, 27, 5, 0); // bytes_tone_map -> TnNode:bytes_tone_map + __hpvm__bindIn(TnNode, 28, 6, 0); // row_size -> TnNode:row_size + __hpvm__bindIn(TnNode, 29, 7, 0); // col_size -> TnNode:col_size + + // descale_fxp inputs + __hpvm__bindIn(DsNode, 6, 0, 0); // result_scaled -> DsNode:input + __hpvm__edge(TnNode, DsNode, 1, 0, 1, + 0); // TnNode:bytes_result -> DsNode:bytes_input + __hpvm__bindIn(DsNode, 2, 2, 0); // result -> DsNode:result + __hpvm__bindIn(DsNode, 3, 3, 0); // bytes_result -> DsNode:bytes_result + __hpvm__bindIn(DsNode, 28, 4, 0); // row_size -> DsNode:row_size + __hpvm__bindIn(DsNode, 29, 5, 0); // col_size -> DsNode:col_size // Similar to bindIn, but for the output. Output of a node is a struct, and // we consider the fields in increasing ordering. - __visc__bindOut(DsNode, 0, 0, 0); - + __hpvm__bindOut(DsNode, 0, 0, 0); } -int main(int argc, char* argv[]) { - // Parse the arguments. - arguments args; - set_default_args(&args); - argp_parse(&parser, argc, argv, 0, 0, &args); - - // Read a raw image. - // NOTE: We deliberately perform this file I/O outside of the kernel. - printf("Reading a raw image from %s\n", args.args[RAW_IMAGE_BIN]); - size_t row_size, col_size; - uint8_t *image_in = read_image_from_binary(args.args[RAW_IMAGE_BIN], &row_size, &col_size); - - printf("Raw image shape: %d x %d x %d\n", row_size, col_size, CHAN_SIZE); - - // Allocate a buffer for storing the output image data. - // (This is currently the same size as the input image data.) - size_t bytes_image = sizeof(uint8_t) * row_size * col_size * CHAN_SIZE; - size_t bytes_fimage = sizeof(float) * row_size * col_size * CHAN_SIZE; - uint8_t *image_out = (uint8_t*) malloc_aligned(bytes_image); - uint8_t *image_out_gamut = (uint8_t*) malloc_aligned(bytes_image); - uint8_t *image_out_demosaic = (uint8_t*) malloc_aligned(bytes_image); - uint8_t *image_out_denoise = (uint8_t*) malloc_aligned(bytes_image); - uint8_t *image_out_transform = (uint8_t*) malloc_aligned(bytes_image); - - __visc__init(); - - /////////////////////////////////////////////////////////////// - // Camera Model Parameters - /////////////////////////////////////////////////////////////// - // Path to the camera model to be used -// char cam_model_path[100]; -// char cam_model_path = "cam_models/NikonD7000/"; - // White balance index (select white balance from transform file) - // The first white balance in the file has a wb_index of 1 - // For more information on model format see the readme - int wb_index = 6; - - // Number of control points - int num_ctrl_pts = 3702; - uint8_t *input, *result; - float *input_scaled, *result_scaled, *demosaic_out, *denoise_out, *transform_out, *gamut_out; - float *TsTw, *ctrl_pts, *weights, *coefs, *tone_map, *l2_dist; - - TsTw = get_TsTw("cam_models/NikonD7000/", wb_index); - float *trans = transpose_mat(TsTw, CHAN_SIZE, CHAN_SIZE); - free(TsTw); - TsTw = trans; - ctrl_pts = get_ctrl_pts("cam_models/NikonD7000/", num_ctrl_pts); - weights = get_weights("cam_models/NikonD7000/", num_ctrl_pts); - coefs = get_coefs("cam_models/NikonD7000/", num_ctrl_pts); - tone_map = get_tone_map("cam_models/NikonD7000/"); - - input_scaled = (float*) malloc_aligned(bytes_fimage); - result_scaled = (float*) malloc_aligned(bytes_fimage); - demosaic_out = (float*) malloc_aligned(bytes_fimage); - denoise_out = (float*) malloc_aligned(bytes_fimage); - transform_out = (float*) malloc_aligned(bytes_fimage); - gamut_out = (float*) malloc_aligned(bytes_fimage); - l2_dist = (float*) malloc_aligned(sizeof(float) * num_ctrl_pts); - - // This is host_input in cam_pipe() - input = (uint8_t*) malloc_aligned(bytes_image); - convert_hwc_to_chw(image_in, row_size, col_size, &input); - - // This is host_result in cam_pipe() - result = (uint8_t*) malloc_aligned(bytes_image); - - // Allocate struct to pass DFG inputs - RootIn* rootArgs = (RootIn*) malloc(sizeof(RootIn)); - - // Set up HPVM DFG inputs in the rootArgs struct. - rootArgs->input = input; - rootArgs->bytes_input = bytes_image; - - rootArgs->result = result; - rootArgs->bytes_result = bytes_image; - - rootArgs->input_scaled = input_scaled; - rootArgs->bytes_input_scaled = bytes_fimage; - - rootArgs->result_scaled = result_scaled; - rootArgs->bytes_result_scaled = bytes_fimage; - - rootArgs->demosaic_out = demosaic_out; - rootArgs->bytes_demosaic_out = bytes_fimage; - - rootArgs->denoise_out = denoise_out; - rootArgs->bytes_denoise_out = bytes_fimage; - - rootArgs->transform_out = transform_out; - rootArgs->bytes_transform_out = bytes_fimage; - - rootArgs->gamut_out = gamut_out; - rootArgs->bytes_gamut_out = bytes_fimage; - - rootArgs->TsTw = TsTw; - rootArgs->bytes_TsTw = CHAN_SIZE * CHAN_SIZE * sizeof(float); - - rootArgs->ctrl_pts = ctrl_pts; - rootArgs->bytes_ctrl_pts = num_ctrl_pts * CHAN_SIZE * sizeof(float); - - rootArgs->weights = weights; - rootArgs->bytes_weights = num_ctrl_pts * CHAN_SIZE * sizeof(float); - - rootArgs->coefs = coefs; - rootArgs->bytes_coefs = 4 * CHAN_SIZE * sizeof(float); - - rootArgs->tone_map = tone_map; - rootArgs->bytes_tone_map = 256 * CHAN_SIZE * sizeof(float); - - rootArgs->l2_dist = l2_dist; - rootArgs->bytes_l2_dist = num_ctrl_pts * sizeof(float); - - rootArgs->row_size = row_size; - rootArgs->col_size = col_size; - - // Memory tracking is required for pointer arguments. - // Nodes can be scheduled on different targets, and - // dataflow edge implementation needs to request data. - // The pair (pointer, size) is inserted in memory tracker using this call - llvm_visc_track_mem(input, bytes_image); - llvm_visc_track_mem(result, bytes_image); - llvm_visc_track_mem(input_scaled, bytes_fimage); - llvm_visc_track_mem(result_scaled, bytes_fimage); - llvm_visc_track_mem(demosaic_out, bytes_fimage); - llvm_visc_track_mem(denoise_out, bytes_fimage); - llvm_visc_track_mem(transform_out, bytes_fimage); - llvm_visc_track_mem(gamut_out, bytes_fimage); - llvm_visc_track_mem(TsTw, CHAN_SIZE * CHAN_SIZE * sizeof(float)); - llvm_visc_track_mem(ctrl_pts, num_ctrl_pts * CHAN_SIZE * sizeof(float)); - llvm_visc_track_mem(weights, num_ctrl_pts * CHAN_SIZE * sizeof(float)); - llvm_visc_track_mem(coefs, 4 * CHAN_SIZE *sizeof(float)); - llvm_visc_track_mem(tone_map, 256 * CHAN_SIZE * sizeof(float)); - llvm_visc_track_mem(l2_dist, num_ctrl_pts * sizeof(float)); - - printf("\n\nLaunching CAVA pipeline!\n"); - - void* camPipeDFG = __visc__launch(0, CamPipeRoot, (void*) rootArgs); - __visc__wait(camPipeDFG); - - printf("\n\nPipeline execution completed!\n"); - printf("\n\nRequesting memory!\n"); - - // Request data from graph. - llvm_visc_request_mem(result, bytes_image); - llvm_visc_request_mem(demosaic_out, bytes_fimage); - llvm_visc_request_mem(denoise_out, bytes_fimage); - llvm_visc_request_mem(transform_out, bytes_fimage); - llvm_visc_request_mem(gamut_out, bytes_fimage); - printf("\n\nDone requesting memory!\n"); - - - uint8_t* gamut_out_descaled = (uint8_t*) malloc_aligned(bytes_image); - uint8_t* demosaic_out_descaled = (uint8_t*) malloc_aligned(bytes_image); - uint8_t* transform_out_descaled = (uint8_t*) malloc_aligned(bytes_image); - uint8_t* denoise_out_descaled = (uint8_t*) malloc_aligned(bytes_image); - - descale_cpu(demosaic_out, bytes_fimage, demosaic_out_descaled, bytes_image, row_size, col_size); - descale_cpu(gamut_out, bytes_fimage, gamut_out_descaled, bytes_image, row_size, col_size); - descale_cpu(denoise_out, bytes_fimage, denoise_out_descaled, bytes_image, row_size, col_size); - descale_cpu(transform_out, bytes_fimage, transform_out_descaled, bytes_image, row_size, col_size); - - convert_chw_to_hwc(result, row_size, col_size, &image_out); - convert_chw_to_hwc(gamut_out_descaled, row_size, col_size, &image_out_gamut); - convert_chw_to_hwc(demosaic_out_descaled, row_size, col_size, &image_out_demosaic); - convert_chw_to_hwc(denoise_out_descaled, row_size, col_size, &image_out_denoise); - convert_chw_to_hwc(transform_out_descaled, row_size, col_size, &image_out_transform); - - - // Remove tracked pointers. - llvm_visc_untrack_mem(input); - llvm_visc_untrack_mem(result); - llvm_visc_untrack_mem(input_scaled); - llvm_visc_untrack_mem(result_scaled); - llvm_visc_untrack_mem(demosaic_out); - llvm_visc_untrack_mem(denoise_out); - llvm_visc_untrack_mem(transform_out); - llvm_visc_untrack_mem(gamut_out); - - llvm_visc_untrack_mem(TsTw); - llvm_visc_untrack_mem(ctrl_pts); - llvm_visc_untrack_mem(weights); - llvm_visc_untrack_mem(coefs); - llvm_visc_untrack_mem(tone_map); - llvm_visc_untrack_mem(l2_dist); - - // Output the image. - // NOTE: We deliberately perform this file I/O outside of the kernel. +int main(int argc, char *argv[]) { + // Parse the arguments. + arguments args; + set_default_args(&args); + argp_parse(&parser, argc, argv, 0, 0, &args); + + // Read a raw image. + // NOTE: We deliberately perform this file I/O outside of the kernel. + printf("Reading a raw image from %s\n", args.args[RAW_IMAGE_BIN]); + size_t row_size, col_size; + uint8_t *image_in = + read_image_from_binary(args.args[RAW_IMAGE_BIN], &row_size, &col_size); + + printf("Raw image shape: %d x %d x %d\n", row_size, col_size, CHAN_SIZE); + + // Allocate a buffer for storing the output image data. + // (This is currently the same size as the input image data.) + size_t bytes_image = sizeof(uint8_t) * row_size * col_size * CHAN_SIZE; + size_t bytes_fimage = sizeof(float) * row_size * col_size * CHAN_SIZE; + uint8_t *image_out = (uint8_t *)malloc_aligned(bytes_image); + uint8_t *image_out_gamut = (uint8_t *)malloc_aligned(bytes_image); + uint8_t *image_out_demosaic = (uint8_t *)malloc_aligned(bytes_image); + uint8_t *image_out_denoise = (uint8_t *)malloc_aligned(bytes_image); + uint8_t *image_out_transform = (uint8_t *)malloc_aligned(bytes_image); + + __hpvm__init(); + + /////////////////////////////////////////////////////////////// + // Camera Model Parameters + /////////////////////////////////////////////////////////////// + // Path to the camera model to be used + // char cam_model_path[100]; + // char cam_model_path = "cam_models/NikonD7000/"; + // White balance index (select white balance from transform file) + // The first white balance in the file has a wb_index of 1 + // For more information on model format see the readme + int wb_index = 6; + + // Number of control points + int num_ctrl_pts = 3702; + uint8_t *input, *result; + float *input_scaled, *result_scaled, *demosaic_out, *denoise_out, + *transform_out, *gamut_out; + float *TsTw, *ctrl_pts, *weights, *coefs, *tone_map, *l2_dist; + + TsTw = get_TsTw("cam_models/NikonD7000/", wb_index); + float *trans = transpose_mat(TsTw, CHAN_SIZE, CHAN_SIZE); + free(TsTw); + TsTw = trans; + ctrl_pts = get_ctrl_pts("cam_models/NikonD7000/", num_ctrl_pts); + weights = get_weights("cam_models/NikonD7000/", num_ctrl_pts); + coefs = get_coefs("cam_models/NikonD7000/", num_ctrl_pts); + tone_map = get_tone_map("cam_models/NikonD7000/"); + + input_scaled = (float *)malloc_aligned(bytes_fimage); + result_scaled = (float *)malloc_aligned(bytes_fimage); + demosaic_out = (float *)malloc_aligned(bytes_fimage); + denoise_out = (float *)malloc_aligned(bytes_fimage); + transform_out = (float *)malloc_aligned(bytes_fimage); + gamut_out = (float *)malloc_aligned(bytes_fimage); + l2_dist = (float *)malloc_aligned(sizeof(float) * num_ctrl_pts); + + // This is host_input in cam_pipe() + input = (uint8_t *)malloc_aligned(bytes_image); + convert_hwc_to_chw(image_in, row_size, col_size, &input); + + // This is host_result in cam_pipe() + result = (uint8_t *)malloc_aligned(bytes_image); + + // Allocate struct to pass DFG inputs + RootIn *rootArgs = (RootIn *)malloc(sizeof(RootIn)); + + // Set up HPVM DFG inputs in the rootArgs struct. + rootArgs->input = input; + rootArgs->bytes_input = bytes_image; + + rootArgs->result = result; + rootArgs->bytes_result = bytes_image; + + rootArgs->input_scaled = input_scaled; + rootArgs->bytes_input_scaled = bytes_fimage; + + rootArgs->result_scaled = result_scaled; + rootArgs->bytes_result_scaled = bytes_fimage; + + rootArgs->demosaic_out = demosaic_out; + rootArgs->bytes_demosaic_out = bytes_fimage; + + rootArgs->denoise_out = denoise_out; + rootArgs->bytes_denoise_out = bytes_fimage; + + rootArgs->transform_out = transform_out; + rootArgs->bytes_transform_out = bytes_fimage; + + rootArgs->gamut_out = gamut_out; + rootArgs->bytes_gamut_out = bytes_fimage; + + rootArgs->TsTw = TsTw; + rootArgs->bytes_TsTw = CHAN_SIZE * CHAN_SIZE * sizeof(float); + + rootArgs->ctrl_pts = ctrl_pts; + rootArgs->bytes_ctrl_pts = num_ctrl_pts * CHAN_SIZE * sizeof(float); + + rootArgs->weights = weights; + rootArgs->bytes_weights = num_ctrl_pts * CHAN_SIZE * sizeof(float); + + rootArgs->coefs = coefs; + rootArgs->bytes_coefs = 4 * CHAN_SIZE * sizeof(float); + + rootArgs->tone_map = tone_map; + rootArgs->bytes_tone_map = 256 * CHAN_SIZE * sizeof(float); + + rootArgs->l2_dist = l2_dist; + rootArgs->bytes_l2_dist = num_ctrl_pts * sizeof(float); + + rootArgs->row_size = row_size; + rootArgs->col_size = col_size; + + // Memory tracking is required for pointer arguments. + // Nodes can be scheduled on different targets, and + // dataflow edge implementation needs to request data. + // The pair (pointer, size) is inserted in memory tracker using this call + llvm_hpvm_track_mem(input, bytes_image); + llvm_hpvm_track_mem(result, bytes_image); + llvm_hpvm_track_mem(input_scaled, bytes_fimage); + llvm_hpvm_track_mem(result_scaled, bytes_fimage); + llvm_hpvm_track_mem(demosaic_out, bytes_fimage); + llvm_hpvm_track_mem(denoise_out, bytes_fimage); + llvm_hpvm_track_mem(transform_out, bytes_fimage); + llvm_hpvm_track_mem(gamut_out, bytes_fimage); + llvm_hpvm_track_mem(TsTw, CHAN_SIZE * CHAN_SIZE * sizeof(float)); + llvm_hpvm_track_mem(ctrl_pts, num_ctrl_pts * CHAN_SIZE * sizeof(float)); + llvm_hpvm_track_mem(weights, num_ctrl_pts * CHAN_SIZE * sizeof(float)); + llvm_hpvm_track_mem(coefs, 4 * CHAN_SIZE * sizeof(float)); + llvm_hpvm_track_mem(tone_map, 256 * CHAN_SIZE * sizeof(float)); + llvm_hpvm_track_mem(l2_dist, num_ctrl_pts * sizeof(float)); + + printf("\n\nLaunching CAVA pipeline!\n"); + + void *camPipeDFG = __hpvm__launch(0, CamPipeRoot, (void *)rootArgs); + __hpvm__wait(camPipeDFG); + + printf("\n\nPipeline execution completed!\n"); + printf("\n\nRequesting memory!\n"); + + // Request data from graph. + llvm_hpvm_request_mem(result, bytes_image); + llvm_hpvm_request_mem(demosaic_out, bytes_fimage); + llvm_hpvm_request_mem(denoise_out, bytes_fimage); + llvm_hpvm_request_mem(transform_out, bytes_fimage); + llvm_hpvm_request_mem(gamut_out, bytes_fimage); + printf("\n\nDone requesting memory!\n"); + + uint8_t *gamut_out_descaled = (uint8_t *)malloc_aligned(bytes_image); + uint8_t *demosaic_out_descaled = (uint8_t *)malloc_aligned(bytes_image); + uint8_t *transform_out_descaled = (uint8_t *)malloc_aligned(bytes_image); + uint8_t *denoise_out_descaled = (uint8_t *)malloc_aligned(bytes_image); + + descale_cpu(demosaic_out, bytes_fimage, demosaic_out_descaled, bytes_image, + row_size, col_size); + descale_cpu(gamut_out, bytes_fimage, gamut_out_descaled, bytes_image, + row_size, col_size); + descale_cpu(denoise_out, bytes_fimage, denoise_out_descaled, bytes_image, + row_size, col_size); + descale_cpu(transform_out, bytes_fimage, transform_out_descaled, bytes_image, + row_size, col_size); + + convert_chw_to_hwc(result, row_size, col_size, &image_out); + convert_chw_to_hwc(gamut_out_descaled, row_size, col_size, &image_out_gamut); + convert_chw_to_hwc(demosaic_out_descaled, row_size, col_size, + &image_out_demosaic); + convert_chw_to_hwc(denoise_out_descaled, row_size, col_size, + &image_out_denoise); + convert_chw_to_hwc(transform_out_descaled, row_size, col_size, + &image_out_transform); + + // Remove tracked pointers. + llvm_hpvm_untrack_mem(input); + llvm_hpvm_untrack_mem(result); + llvm_hpvm_untrack_mem(input_scaled); + llvm_hpvm_untrack_mem(result_scaled); + llvm_hpvm_untrack_mem(demosaic_out); + llvm_hpvm_untrack_mem(denoise_out); + llvm_hpvm_untrack_mem(transform_out); + llvm_hpvm_untrack_mem(gamut_out); + + llvm_hpvm_untrack_mem(TsTw); + llvm_hpvm_untrack_mem(ctrl_pts); + llvm_hpvm_untrack_mem(weights); + llvm_hpvm_untrack_mem(coefs); + llvm_hpvm_untrack_mem(tone_map); + llvm_hpvm_untrack_mem(l2_dist); + + // Output the image. + // NOTE: We deliberately perform this file I/O outside of the kernel. char str[50], base_str[50]; strcpy(base_str, args.args[OUTPUT_IMAGE_BIN]); strcpy(str, base_str); @@ -877,8 +904,7 @@ int main(int argc, char* argv[]) { printf("Writing output image to %s\n", str); write_image_to_binary(str, image_out_transform, row_size, col_size); - __visc__cleanup(); + __hpvm__cleanup(); - return 0; + return 0; } - diff --git a/hpvm/test/hpvm-cava/src/pipe_stages.c b/hpvm/test/hpvm-cava/src/pipe_stages.c index 2ebedec936915b5e7f11881c5001c84b6db26474..05bb06697fa8df130aa0d0d324f9bc39bc575fb2 100644 --- a/hpvm/test/hpvm-cava/src/pipe_stages.c +++ b/hpvm/test/hpvm-cava/src/pipe_stages.c @@ -1,172 +1,169 @@ -#include <stdio.h> -#include <math.h> #include "pipe_stages.h" #include "cam_pipe_utility.h" +#include <math.h> +#include <stdio.h> + +// void scale_fxp(uint8_t *input, int row_size, int col_size, float *output) { +void scale_fxp(uint8_t *input, size_t bytes_input, float *output, + size_t bytes_output, int row_size, int col_size) { + __hpvm__hint(DEVICE); + __hpvm__attributes(2, input, output, 1, output); -//void scale_fxp(uint8_t *input, int row_size, int col_size, float *output) { -void scale_fxp(uint8_t *input, size_t bytes_input, - float *output, size_t bytes_output, - int row_size, int col_size) { - __visc__hint(DEVICE); - __visc__attributes(2, input, output, 1, output); - ARRAY_3D(uint8_t, _input, input, row_size, col_size); ARRAY_3D(float, _output, output, row_size, col_size); - sl_chan: +sl_chan: for (int chan = 0; chan < CHAN_SIZE; chan++) - sl_row: + sl_row: for (int row = 0; row < row_size; row++) - sl_col: + sl_col: for (int col = 0; col < col_size; col++) _output[chan][row][col] = _input[chan][row][col] * 1.0 / 255; - __visc__return(1, bytes_output); + __hpvm__return(1, bytes_output); } -//void descale_fxp(float *input, int row_size, int col_size, uint8_t *output) { -void descale_fxp(float *input, size_t bytes_input, - uint8_t *output, size_t bytes_result, - int row_size, int col_size) { - __visc__hint(DEVICE); - __visc__attributes(2, input, output, 1, output); - +// void descale_fxp(float *input, int row_size, int col_size, uint8_t *output) { +void descale_fxp(float *input, size_t bytes_input, uint8_t *output, + size_t bytes_result, int row_size, int col_size) { + __hpvm__hint(DEVICE); + __hpvm__attributes(2, input, output, 1, output); + ARRAY_3D(float, _input, input, row_size, col_size); ARRAY_3D(uint8_t, _output, output, row_size, col_size); - dsl_chan: +dsl_chan: for (int chan = 0; chan < CHAN_SIZE; chan++) - dsl_row: + dsl_row: for (int row = 0; row < row_size; row++) - dsl_col: + dsl_col: for (int col = 0; col < col_size; col++) - _output[chan][row][col] = min(max(_input[chan][row][col] * 255, 0), 255); + _output[chan][row][col] = + min(max(_input[chan][row][col] * 255, 0), 255); - __visc__return(1, bytes_output); + __hpvm__return(1, bytes_output); } // Demosaicing stage // G R // B G -//void demosaic_fxp(float *input, int row_size, int col_size, float *result) { -void demosaic_fxp(float *input, size_t bytes_input, - float *result, size_t bytes_result, - int row_size, int col_size) { - __visc__hint(DEVICE); - __visc__attributes(2, input, result, 1, result); - +// void demosaic_fxp(float *input, int row_size, int col_size, float *result) { +void demosaic_fxp(float *input, size_t bytes_input, float *result, + size_t bytes_result, int row_size, int col_size) { + __hpvm__hint(DEVICE); + __hpvm__attributes(2, input, result, 1, result); + printf("Demosaicing.\n"); ARRAY_3D(float, _input, input, row_size, col_size); ARRAY_3D(float, _result, result, row_size, col_size); - dm_row: +dm_row: for (int row = 1; row < row_size - 1; row++) - dm_col: + dm_col: for (int col = 1; col < col_size - 1; col++) - if (row % 2 == 0 && col % 2 == 0) { - // Green pixel - // Getting the R values - float R1 = _input[0][row][col - 1]; - float R2 = _input[0][row][col + 1]; - // Getting the B values - float B1 = _input[2][row - 1][col]; - float B2 = _input[2][row + 1][col]; - // R - _result[0][row][col] = (R1 + R2) / 2; - // G - _result[1][row][col] = _input[1][row][col] * 2; - // B - _result[2][row][col] = (B1 + B2) / 2; - } else if (row % 2 == 0 && col % 2 == 1) { - // Red pixel - // Getting the G values - float G1 = _input[1][row - 1][col]; - float G2 = _input[1][row + 1][col]; - float G3 = _input[1][row][col - 1]; - float G4 = _input[1][row][col + 1]; - // Getting the B values - float B1 = _input[2][row - 1][col - 1]; - float B2 = _input[2][row - 1][col + 1]; - float B3 = _input[2][row + 1][col - 1]; - float B4 = _input[2][row + 1][col + 1]; - // R - _result[0][row][col] = _input[0][row][col]; - // G - _result[1][row][col] = (G1 + G2 + G3 + G4) / 2; - // B (center pixel) - _result[2][row][col] = (B1 + B2 + B3 + B4) / 4; - } else if (row % 2 == 1 && col % 2 == 0) { - // Blue pixel - // Getting the R values - float R1 = _input[0][row - 1][col - 1]; - float R2 = _input[0][row + 1][col - 1]; - float R3 = _input[0][row - 1][col + 1]; - float R4 = _input[0][row + 1][col + 1]; - // Getting the G values - float G1 = _input[1][row - 1][col]; - float G2 = _input[1][row + 1][col]; - float G3 = _input[1][row][col - 1]; - float G4 = _input[1][row][col + 1]; - // R - _result[0][row][col] = (R1 + R2 + R3 + R4) / 4; - // G - _result[1][row][col] = (G1 + G2 + G3 + G4) / 2; - // B - _result[2][row][col] = _input[2][row][col]; - } else { - // Bottom Green pixel - // Getting the R values - float R1 = _input[0][row - 1][col]; - float R2 = _input[0][row + 1][col]; - // Getting the B values - float B1 = _input[2][row][col - 1]; - float B2 = _input[2][row][col + 1]; - // R - _result[0][row][col] = (R1 + R2) / 2; - // G - _result[1][row][col] = _input[1][row][col] * 2; - // B - _result[2][row][col] = (B1 + B2) / 2; - } + if (row % 2 == 0 && col % 2 == 0) { + // Green pixel + // Getting the R values + float R1 = _input[0][row][col - 1]; + float R2 = _input[0][row][col + 1]; + // Getting the B values + float B1 = _input[2][row - 1][col]; + float B2 = _input[2][row + 1][col]; + // R + _result[0][row][col] = (R1 + R2) / 2; + // G + _result[1][row][col] = _input[1][row][col] * 2; + // B + _result[2][row][col] = (B1 + B2) / 2; + } else if (row % 2 == 0 && col % 2 == 1) { + // Red pixel + // Getting the G values + float G1 = _input[1][row - 1][col]; + float G2 = _input[1][row + 1][col]; + float G3 = _input[1][row][col - 1]; + float G4 = _input[1][row][col + 1]; + // Getting the B values + float B1 = _input[2][row - 1][col - 1]; + float B2 = _input[2][row - 1][col + 1]; + float B3 = _input[2][row + 1][col - 1]; + float B4 = _input[2][row + 1][col + 1]; + // R + _result[0][row][col] = _input[0][row][col]; + // G + _result[1][row][col] = (G1 + G2 + G3 + G4) / 2; + // B (center pixel) + _result[2][row][col] = (B1 + B2 + B3 + B4) / 4; + } else if (row % 2 == 1 && col % 2 == 0) { + // Blue pixel + // Getting the R values + float R1 = _input[0][row - 1][col - 1]; + float R2 = _input[0][row + 1][col - 1]; + float R3 = _input[0][row - 1][col + 1]; + float R4 = _input[0][row + 1][col + 1]; + // Getting the G values + float G1 = _input[1][row - 1][col]; + float G2 = _input[1][row + 1][col]; + float G3 = _input[1][row][col - 1]; + float G4 = _input[1][row][col + 1]; + // R + _result[0][row][col] = (R1 + R2 + R3 + R4) / 4; + // G + _result[1][row][col] = (G1 + G2 + G3 + G4) / 2; + // B + _result[2][row][col] = _input[2][row][col]; + } else { + // Bottom Green pixel + // Getting the R values + float R1 = _input[0][row - 1][col]; + float R2 = _input[0][row + 1][col]; + // Getting the B values + float B1 = _input[2][row][col - 1]; + float B2 = _input[2][row][col + 1]; + // R + _result[0][row][col] = (R1 + R2) / 2; + // G + _result[1][row][col] = _input[1][row][col] * 2; + // B + _result[2][row][col] = (B1 + B2) / 2; + } - __visc__return(1, bytes_result); + __hpvm__return(1, bytes_result); } static void sort(float arr[], int n) { - int i, j; - dn_sort_i: - for (i = 0; i < n - 1; i++) - dn_sort_j: - for (j = 0; j < n - i - 1; j++) - if (arr[j] > arr[j + 1]) { - float temp = arr[j]; - arr[j] = arr[j + 1]; - arr[j + 1] = temp; - } + int i, j; +dn_sort_i: + for (i = 0; i < n - 1; i++) + dn_sort_j: + for (j = 0; j < n - i - 1; j++) + if (arr[j] > arr[j + 1]) { + float temp = arr[j]; + arr[j] = arr[j + 1]; + arr[j + 1] = temp; + } } // Simple denoise -//void denoise_fxp(float *input, int row_size, int col_size, float *result) { -void denoise_fxp(float *input, size_t bytes_input, - float *result, size_t bytes_result, - int row_size, int col_size) { - __visc__hint(DEVICE); - __visc__attributes(2, input, result, 1, result); - +// void denoise_fxp(float *input, int row_size, int col_size, float *result) { +void denoise_fxp(float *input, size_t bytes_input, float *result, + size_t bytes_result, int row_size, int col_size) { + __hpvm__hint(DEVICE); + __hpvm__attributes(2, input, result, 1, result); + printf("Denoising.\n"); ARRAY_3D(float, _input, input, row_size, col_size); ARRAY_3D(float, _result, result, row_size, col_size); - dn_chan: +dn_chan: for (int chan = 0; chan < CHAN_SIZE; chan++) - dn_row: + dn_row: for (int row = 0; row < row_size; row++) - dn_col: + dn_col: for (int col = 0; col < col_size; col++) if (row >= 1 && row < row_size - 1 && col >= 1 && col < col_size - 1) { float filter[9]; - dn_slide_row: - for (int i = row-1; i < row+2; i++) - dn_slide_col: - for (int j = col-1; j < col+2; j++) { + dn_slide_row: + for (int i = row - 1; i < row + 2; i++) + dn_slide_col: + for (int j = col - 1; j < col + 2; j++) { int index = (i - row + 1) * 3 + j - col + 1; filter[index] = _input[chan][i][j]; } @@ -175,53 +172,52 @@ void denoise_fxp(float *input, size_t bytes_input, } else { _result[chan][row][col] = _input[chan][row][col]; } - __visc__return(1, bytes_result); + __hpvm__return(1, bytes_result); } // Color map and white balance transform -//void transform_fxp(float *input, int row_size, int col_size, float *result, +// void transform_fxp(float *input, int row_size, int col_size, float *result, // float *TsTw_tran) { -void transform_fxp(float *input, size_t bytes_input, - float *result, size_t bytes_result, - float *TsTw_tran, size_t bytes_TsTw, +void transform_fxp(float *input, size_t bytes_input, float *result, + size_t bytes_result, float *TsTw_tran, size_t bytes_TsTw, int row_size, int col_size) { - __visc__hint(DEVICE); - __visc__attributes(3, input, result, TsTw_tran, 1, result); - + __hpvm__hint(DEVICE); + __hpvm__attributes(3, input, result, TsTw_tran, 1, result); + printf("Color mapping.\n"); ARRAY_3D(float, _input, input, row_size, col_size); ARRAY_3D(float, _result, result, row_size, col_size); ARRAY_2D(float, _TsTw_tran, TsTw_tran, 3); - tr_chan: +tr_chan: for (int chan = 0; chan < CHAN_SIZE; chan++) - tr_row: + tr_row: for (int row = 0; row < row_size; row++) - tr_col: + tr_col: for (int col = 0; col < col_size; col++) _result[chan][row][col] = max(_input[0][row][col] * _TsTw_tran[0][chan] + _input[1][row][col] * _TsTw_tran[1][chan] + _input[2][row][col] * _TsTw_tran[2][chan], 0); - __visc__return(1, bytes_result); + __hpvm__return(1, bytes_result); } // // Weighted radial basis function for gamut mapping // -//void gamut_map_fxp(float *input, int row_size, int col_size, float *result, -// float *ctrl_pts, float *weights, float *coefs, float *l2_dist) { -void gamut_map_fxp(float *input, size_t bytes_input, - float *result, size_t bytes_result, - float *ctrl_pts, size_t bytes_ctrl_pts, - float *weights, size_t bytes_weights, - float *coefs, size_t bytes_coefs, - float *l2_dist, size_t bytes_l2_dist, +// void gamut_map_fxp(float *input, int row_size, int col_size, float *result, +// float *ctrl_pts, float *weights, float *coefs, float +// *l2_dist) { +void gamut_map_fxp(float *input, size_t bytes_input, float *result, + size_t bytes_result, float *ctrl_pts, size_t bytes_ctrl_pts, + float *weights, size_t bytes_weights, float *coefs, + size_t bytes_coefs, float *l2_dist, size_t bytes_l2_dist, int row_size, int col_size) { - __visc__hint(DEVICE); - __visc__attributes(6, input, result, ctrl_pts, weights, coefs, l2_dist, 1, result); - + __hpvm__hint(DEVICE); + __hpvm__attributes(6, input, result, ctrl_pts, weights, coefs, l2_dist, 1, + result); + printf("Gamut mapping.\n"); ARRAY_3D(float, _input, input, row_size, col_size); ARRAY_3D(float, _result, result, row_size, col_size); @@ -229,26 +225,25 @@ void gamut_map_fxp(float *input, size_t bytes_input, ARRAY_2D(float, _weights, weights, 3); ARRAY_2D(float, _coefs, coefs, 3); - // First, get the L2 norm from every pixel to the control points, - // Then, sum it and weight it. Finally, add the bias. - gm_rbf_row: +// First, get the L2 norm from every pixel to the control points, +// Then, sum it and weight it. Finally, add the bias. +gm_rbf_row: for (int row = 0; row < row_size; row++) - gm_rbf_col: + gm_rbf_col: for (int col = 0; col < col_size; col++) { - gm_rbf_cp0: + gm_rbf_cp0: for (int cp = 0; cp < num_ctrl_pts; cp++) { - l2_dist[cp] = - sqrt((_input[0][row][col] - _ctrl_pts[cp][0]) * - (_input[0][row][col] - _ctrl_pts[cp][0]) + - (_input[1][row][col] - _ctrl_pts[cp][1]) * - (_input[1][row][col] - _ctrl_pts[cp][1]) + - (_input[2][row][col] - _ctrl_pts[cp][2]) * - (_input[2][row][col] - _ctrl_pts[cp][2])); + l2_dist[cp] = sqrt((_input[0][row][col] - _ctrl_pts[cp][0]) * + (_input[0][row][col] - _ctrl_pts[cp][0]) + + (_input[1][row][col] - _ctrl_pts[cp][1]) * + (_input[1][row][col] - _ctrl_pts[cp][1]) + + (_input[2][row][col] - _ctrl_pts[cp][2]) * + (_input[2][row][col] - _ctrl_pts[cp][2])); } - gm_rbf_chan: + gm_rbf_chan: for (int chan = 0; chan < CHAN_SIZE; chan++) { float chan_val = 0.0; - gm_rbf_cp1: + gm_rbf_cp1: for (int cp = 0; cp < num_ctrl_pts; cp++) { chan_val += l2_dist[cp] * _weights[cp][chan]; } @@ -259,32 +254,31 @@ void gamut_map_fxp(float *input, size_t bytes_input, _result[chan][row][col] = max(chan_val, 0); } } - __visc__return(1, bytes_result); + __hpvm__return(1, bytes_result); } // Tone mapping -//void tone_map_fxp(float *input, int row_size, int col_size, float *tone_map, +// void tone_map_fxp(float *input, int row_size, int col_size, float *tone_map, // float *result) { -void tone_map_fxp(float *input, size_t bytes_input, - float *result, size_t bytes_result, - float *tone_map, size_t bytes_tone_map, +void tone_map_fxp(float *input, size_t bytes_input, float *result, + size_t bytes_result, float *tone_map, size_t bytes_tone_map, int row_size, int col_size) { - __visc__hint(DEVICE); - __visc__attributes(3, input, result, tone_map, 1, result); - + __hpvm__hint(DEVICE); + __hpvm__attributes(3, input, result, tone_map, 1, result); + printf("Tone mapping.\n"); ARRAY_3D(float, _input, input, row_size, col_size); ARRAY_3D(float, _result, result, row_size, col_size); ARRAY_2D(float, _tone_map, tone_map, 3); - tm_chan: +tm_chan: for (int chan = 0; chan < CHAN_SIZE; chan++) - tm_row: + tm_row: for (int row = 0; row < row_size; row++) - tm_col: + tm_col: for (int col = 0; col < col_size; col++) { uint8_t x = _input[chan][row][col] * 255; _result[chan][row][col] = _tone_map[x][chan]; } - __visc__return(1, bytes_result); + __hpvm__return(1, bytes_result); } diff --git a/hpvm/test/hpvm-cava/src/pipe_stages.h b/hpvm/test/hpvm-cava/src/pipe_stages.h index 8d98cb65cc8af7353cc1faf08988f3b1a6758046..f960822a03326638189c8d294938452ba2670b41 100644 --- a/hpvm/test/hpvm-cava/src/pipe_stages.h +++ b/hpvm/test/hpvm-cava/src/pipe_stages.h @@ -7,54 +7,52 @@ #define ISP 0x4 -#define max(a,b) \ - ({ __typeof__ (a) _a = (a); \ - __typeof__ (b) _b = (b); \ - _a > _b ? _a : _b; }) - -#define min(a,b) \ - ({ __typeof__ (a) _a = (a); \ - __typeof__ (b) _b = (b); \ - _a < _b ? _a : _b; }) - -#define abs(a) \ - ({ __typeof__ (a) _a = (a); \ - _a < 0 ? -_a : _a; }) +#define max(a, b) \ + ({ \ + __typeof__(a) _a = (a); \ + __typeof__(b) _b = (b); \ + _a > _b ? _a : _b; \ + }) + +#define min(a, b) \ + ({ \ + __typeof__(a) _a = (a); \ + __typeof__(b) _b = (b); \ + _a < _b ? _a : _b; \ + }) + +#define abs(a) \ + ({ \ + __typeof__(a) _a = (a); \ + _a < 0 ? -_a : _a; \ + }) extern int num_ctrl_pts; -void scale_fxp(uint8_t *input, size_t bytes_input, - float *output, size_t bytes_output, - size_t row_size, size_t col_size); +void scale_fxp(uint8_t *input, size_t bytes_input, float *output, + size_t bytes_output, size_t row_size, size_t col_size); -void descale_fxp(float *input, size_t bytes_input, - uint8_t *output, size_t bytes_result, - size_t row_size, size_t col_size); +void descale_fxp(float *input, size_t bytes_input, uint8_t *output, + size_t bytes_result, size_t row_size, size_t col_size); -void demosaic_fxp(float *input, size_t bytes_input, - float *result, size_t bytes_result, - size_t row_size, size_t col_size); +void demosaic_fxp(float *input, size_t bytes_input, float *result, + size_t bytes_result, size_t row_size, size_t col_size); -void denoise_fxp(float *input, size_t bytes_input, - float *result, size_t bytes_result, - size_t row_size, size_t col_size); +void denoise_fxp(float *input, size_t bytes_input, float *result, + size_t bytes_result, size_t row_size, size_t col_size); -void transform_fxp(float *input, size_t bytes_input, - float *result, size_t bytes_result, - float *TsTw_tran, size_t bytes_TsTw, +void transform_fxp(float *input, size_t bytes_input, float *result, + size_t bytes_result, float *TsTw_tran, size_t bytes_TsTw, size_t row_size, size_t col_size); -void gamut_map_fxp(float *input, size_t bytes_input, - float *result, size_t bytes_result, - float *ctrl_pts, size_t bytes_ctrl_pts, - float *weights, size_t bytes_weights, - float *coefs, size_t bytes_coefs, - float *l2_dist, size_t bytes_l2_dist, +void gamut_map_fxp(float *input, size_t bytes_input, float *result, + size_t bytes_result, float *ctrl_pts, size_t bytes_ctrl_pts, + float *weights, size_t bytes_weights, float *coefs, + size_t bytes_coefs, float *l2_dist, size_t bytes_l2_dist, size_t row_size, size_t col_size); -void tone_map_fxp(float *input, size_t bytes_input, - float *result, size_t bytes_result, - float *tone_map, size_t bytes_tone_map, +void tone_map_fxp(float *input, size_t bytes_input, float *result, + size_t bytes_result, float *tone_map, size_t bytes_tone_map, size_t row_size, size_t col_size); void tone_map_approx_fxp(float *input, size_t row_size, size_t col_size, diff --git a/hpvm/test/hpvm-cava/src/utility.c b/hpvm/test/hpvm-cava/src/utility.c index c1eaee3333c2afffdcae827f956efa4e25705352..86bd018183403f637ca8fb7cfb634a09c3ceace8 100644 --- a/hpvm/test/hpvm-cava/src/utility.c +++ b/hpvm/test/hpvm-cava/src/utility.c @@ -1,7 +1,7 @@ -#include <stdlib.h> -#include <assert.h> -#include "defs.h" #include "utility.h" +#include "defs.h" +#include <assert.h> +#include <stdlib.h> void *malloc_aligned(size_t size) { void *ptr = NULL; diff --git a/hpvm/test/include/hpvm.h b/hpvm/test/include/hpvm.h new file mode 100644 index 0000000000000000000000000000000000000000..1e31c98946f00e32d84933fe4bfd443e65cb92a9 --- /dev/null +++ b/hpvm/test/include/hpvm.h @@ -0,0 +1,73 @@ +/*************************************************************************** + *cr + *cr (C) Copyright 2010 The Board of Trustees of the + *cr University of Illinois + *cr All Rights Reserved + *cr + ***************************************************************************/ + +#ifndef DEVICE +#define DEVICE GPU_TARGET +#endif + +#include "../../include/SupportHPVM/HPVMHint.h" + +#ifndef __cplusplus +#define noexcept +#endif + +#ifdef __cplusplus +extern "C" { +void __hpvm__hint(hpvm::Target) noexcept; +#else +void __hpvm__hint(enum Target) noexcept; +#endif + +void *__hpvm__createNodeND(unsigned, ...) noexcept; +void __hpvm__return(unsigned, ...) noexcept; + +void __hpvm__attributes(unsigned, ...) noexcept; +void __hpvm__init() noexcept; +void __hpvm__cleanup() noexcept; + +void __hpvm__bindIn(void *, unsigned, unsigned, unsigned) noexcept; +void __hpvm__bindOut(void *, unsigned, unsigned, unsigned) noexcept; +void *__hpvm__edge(void *, void *, unsigned, unsigned, unsigned, + unsigned) noexcept; + +void __hpvm__push(void *, void *) noexcept; +void *__hpvm__pop(void *) noexcept; +void *__hpvm__launch(unsigned, ...) noexcept; +void __hpvm__wait(void *) noexcept; + +void *__hpvm__getNode() noexcept; +void *__hpvm__getParentNode(void *) noexcept; +void __hpvm__barrier() noexcept; +void *__hpvm__malloc(long) noexcept; +long __hpvm__getNodeInstanceID_x(void *) noexcept; +long __hpvm__getNodeInstanceID_y(void *) noexcept; +long __hpvm__getNodeInstanceID_z(void *) noexcept; +long __hpvm__getNumNodeInstances_x(void *) noexcept; +long __hpvm__getNumNodeInstances_y(void *) noexcept; +long __hpvm__getNumNodeInstances_z(void *) noexcept; + +// Atomic +// signed int +int __hpvm__atomic_add(int *, int) noexcept; +int __hpvm__atomic_sub(int *, int) noexcept; +int __hpvm__atomic_xchg(int *, int) noexcept; +int __hpvm__atomic_inc(int *) noexcept; +int __hpvm__atomic_dec(int *) noexcept; +int __hpvm__atomic_min(int *, int) noexcept; +int __hpvm__atomic_max(int *, int) noexcept; +int __hpvm__atomic_and(int *, int) noexcept; +int __hpvm__atomic_or(int *, int) noexcept; +int __hpvm__atomic_xor(int *, int) noexcept; + +void llvm_hpvm_track_mem(void *, size_t) noexcept; +void llvm_hpvm_untrack_mem(void *) noexcept; +void llvm_hpvm_request_mem(void *, size_t) noexcept; + +#ifdef __cplusplus +} +#endif diff --git a/hpvm/test/include/visc.h b/hpvm/test/include/visc.h deleted file mode 100644 index 18b29500261362be66ea23feecf9a5f85ac68005..0000000000000000000000000000000000000000 --- a/hpvm/test/include/visc.h +++ /dev/null @@ -1,73 +0,0 @@ -/*************************************************************************** - *cr - *cr (C) Copyright 2010 The Board of Trustees of the - *cr University of Illinois - *cr All Rights Reserved - *cr - ***************************************************************************/ - -#ifndef DEVICE -#define DEVICE GPU_TARGET -#endif - -#include "../../include/SupportVISC/VISCHint.h" - -#ifndef __cplusplus -#define noexcept -#endif - -#ifdef __cplusplus -extern "C" { -void __visc__hint(visc::Target) noexcept; -#else -void __visc__hint(enum Target) noexcept; -#endif - -void *__visc__createNodeND(unsigned, ...) noexcept; -void __visc__return(unsigned, ...) noexcept; - -void __visc__attributes(unsigned, ...) noexcept; -void __visc__init() noexcept; -void __visc__cleanup() noexcept; - -void __visc__bindIn(void *, unsigned, unsigned, unsigned) noexcept; -void __visc__bindOut(void *, unsigned, unsigned, unsigned) noexcept; -void *__visc__edge(void *, void *, unsigned, unsigned, unsigned, - unsigned) noexcept; - -void __visc__push(void *, void *) noexcept; -void *__visc__pop(void *) noexcept; -void *__visc__launch(unsigned, ...) noexcept; -void __visc__wait(void *) noexcept; - -void *__visc__getNode() noexcept; -void *__visc__getParentNode(void *) noexcept; -void __visc__barrier() noexcept; -void *__visc__malloc(long) noexcept; -long __visc__getNodeInstanceID_x(void *) noexcept; -long __visc__getNodeInstanceID_y(void *) noexcept; -long __visc__getNodeInstanceID_z(void *) noexcept; -long __visc__getNumNodeInstances_x(void *) noexcept; -long __visc__getNumNodeInstances_y(void *) noexcept; -long __visc__getNumNodeInstances_z(void *) noexcept; - -// Atomic -// signed int -int __visc__atomic_add(int *, int) noexcept; -int __visc__atomic_sub(int *, int) noexcept; -int __visc__atomic_xchg(int *, int) noexcept; -int __visc__atomic_inc(int *) noexcept; -int __visc__atomic_dec(int *) noexcept; -int __visc__atomic_min(int *, int) noexcept; -int __visc__atomic_max(int *, int) noexcept; -int __visc__atomic_and(int *, int) noexcept; -int __visc__atomic_or(int *, int) noexcept; -int __visc__atomic_xor(int *, int) noexcept; - -void llvm_visc_track_mem(void *, size_t) noexcept; -void llvm_visc_untrack_mem(void *) noexcept; -void llvm_visc_request_mem(void *, size_t) noexcept; - -#ifdef __cplusplus -} -#endif diff --git a/hpvm/test/parboil/README.md b/hpvm/test/parboil/README.md index 1166e4f10f6a6e29e4f5d40871674c27da975acc..853b46ed515455fbcb206630a74d5490c79ffd88 100644 --- a/hpvm/test/parboil/README.md +++ b/hpvm/test/parboil/README.md @@ -2,7 +2,7 @@ | Benchmark | Version | Supported on CPU | Supported on GPU | | :-------- | :------ | :--------------: | :--------------: | -| sgemm | visc | ✔ | ✔ | -| stencil | visc | ✔ | ✔ | -| spmv | visc | ✔ | ✘ | -| lbm | visc | ✔ | ✘ | +| sgemm | hpvm | ✔ | ✔ | +| stencil | hpvm | ✔ | ✔ | +| spmv | hpvm | ✔ | ✘ | +| lbm | hpvm | ✔ | ✘ | diff --git a/hpvm/test/parboil/benchmarks/lbm/Makefile b/hpvm/test/parboil/benchmarks/lbm/Makefile index 4ebf6fc0af2f05cd10f6d556e0b52bee186540d8..af7215ff7039795e2d09ce98af675a851b32b0cb 100644 --- a/hpvm/test/parboil/benchmarks/lbm/Makefile +++ b/hpvm/test/parboil/benchmarks/lbm/Makefile @@ -5,9 +5,9 @@ ifeq ($(NUM_CORES),) NUM_CORES=8 endif -# Default compile visc +# Default compile hpvm ifeq ($(VERSION),) - VERSION = visc + VERSION = hpvm endif # Default use small test case diff --git a/hpvm/test/parboil/benchmarks/lbm/src/visc/Makefile b/hpvm/test/parboil/benchmarks/lbm/src/hpvm/Makefile similarity index 85% rename from hpvm/test/parboil/benchmarks/lbm/src/visc/Makefile rename to hpvm/test/parboil/benchmarks/lbm/src/hpvm/Makefile index d1664ee9880312ccfa2677e6a284851ecadf1f24..5aa206f758e87a94cdaa1cbaadfa3bf9b661d120 100644 --- a/hpvm/test/parboil/benchmarks/lbm/src/visc/Makefile +++ b/hpvm/test/parboil/benchmarks/lbm/src/hpvm/Makefile @@ -1,8 +1,8 @@ # (c) 2010 The Board of Trustees of the University of Illinois. -LANGUAGE=visc +LANGUAGE=hpvm SRCDIR_OBJS=lbm.ll -VISC_OBJS=main.visc.ll +HPVM_OBJS=main.hpvm.ll APP_CUDALDFLAGS=-lm APP_CFLAGS=-ffast-math -O3 -DNUM_CORES=$(NUM_CORES) APP_CXXFLAGS=-ffast-math -O3 -DNUM_CORES=$(NUM_CORES) diff --git a/hpvm/test/parboil/benchmarks/lbm/src/visc/layout_config.h b/hpvm/test/parboil/benchmarks/lbm/src/hpvm/layout_config.h similarity index 100% rename from hpvm/test/parboil/benchmarks/lbm/src/visc/layout_config.h rename to hpvm/test/parboil/benchmarks/lbm/src/hpvm/layout_config.h diff --git a/hpvm/test/parboil/benchmarks/lbm/src/visc/lbm.cpp b/hpvm/test/parboil/benchmarks/lbm/src/hpvm/lbm.cpp similarity index 100% rename from hpvm/test/parboil/benchmarks/lbm/src/visc/lbm.cpp rename to hpvm/test/parboil/benchmarks/lbm/src/hpvm/lbm.cpp diff --git a/hpvm/test/parboil/benchmarks/lbm/src/visc/lbm.h b/hpvm/test/parboil/benchmarks/lbm/src/hpvm/lbm.h similarity index 100% rename from hpvm/test/parboil/benchmarks/lbm/src/visc/lbm.h rename to hpvm/test/parboil/benchmarks/lbm/src/hpvm/lbm.h diff --git a/hpvm/test/parboil/benchmarks/lbm/src/visc/lbm_macros.h b/hpvm/test/parboil/benchmarks/lbm/src/hpvm/lbm_macros.h similarity index 100% rename from hpvm/test/parboil/benchmarks/lbm/src/visc/lbm_macros.h rename to hpvm/test/parboil/benchmarks/lbm/src/hpvm/lbm_macros.h diff --git a/hpvm/test/parboil/benchmarks/lbm/src/visc/main.cpp b/hpvm/test/parboil/benchmarks/lbm/src/hpvm/main.cpp similarity index 86% rename from hpvm/test/parboil/benchmarks/lbm/src/visc/main.cpp rename to hpvm/test/parboil/benchmarks/lbm/src/hpvm/main.cpp index bb9f6ed1f03d203e412df679775a83c6ff5c349d..445978c086aaab4c2c45be93da6031bf06da7123 100644 --- a/hpvm/test/parboil/benchmarks/lbm/src/visc/main.cpp +++ b/hpvm/test/parboil/benchmarks/lbm/src/hpvm/main.cpp @@ -12,7 +12,7 @@ #include <stdio.h> #include <stdlib.h> #include <sys/stat.h> -#include <visc.h> +#include <hpvm.h> #include "lbm_macros.h" #include "layout_config.h" @@ -92,18 +92,18 @@ typedef struct __attribute__((__packed__)) { void performStreamCollide_kernel(float *srcG, size_t bytes_srcG, float *dstG, size_t bytes_dstG) { - __visc__hint(visc::DEVICE); - __visc__attributes(2, srcG, dstG, 1, dstG); + __hpvm__hint(hpvm::DEVICE); + __hpvm__attributes(2, srcG, dstG, 1, dstG); - void *thisNode = __visc__getNode(); - void *parentNode = __visc__getParentNode(thisNode); + void *thisNode = __hpvm__getNode(); + void *parentNode = __hpvm__getParentNode(thisNode); srcG += MARGIN; dstG += MARGIN; - int lx = __visc__getNodeInstanceID_x(thisNode); - int gx = __visc__getNodeInstanceID_x(parentNode); - int gy = __visc__getNodeInstanceID_y(parentNode); + int lx = __hpvm__getNodeInstanceID_x(thisNode); + int gx = __hpvm__getNodeInstanceID_x(parentNode); + int gy = __hpvm__getNodeInstanceID_y(parentNode); // Using some predefined macros here. Consider this the declaration // and initialization of the variables SWEEP_X, SWEEP_Y and SWEEP_Z @@ -274,40 +274,40 @@ void performStreamCollide_kernel(float *srcG, size_t bytes_srcG, float *dstG, void lbmLvl1(float *srcG, size_t bytes_srcG, float *dstG, size_t bytes_dstG, size_t dim_X1) { - __visc__hint(visc::DEVICE); - __visc__attributes(2, srcG, dstG, 1, dstG); + __hpvm__hint(hpvm::DEVICE); + __hpvm__attributes(2, srcG, dstG, 1, dstG); void *lbm_node = - __visc__createNodeND(2, performStreamCollide_kernel, dim_X1, (size_t)1); - __visc__bindIn(lbm_node, 0, 0, 0); - __visc__bindIn(lbm_node, 1, 1, 0); - __visc__bindIn(lbm_node, 2, 2, 0); - __visc__bindIn(lbm_node, 3, 3, 0); + __hpvm__createNodeND(2, performStreamCollide_kernel, dim_X1, (size_t)1); + __hpvm__bindIn(lbm_node, 0, 0, 0); + __hpvm__bindIn(lbm_node, 1, 1, 0); + __hpvm__bindIn(lbm_node, 2, 2, 0); + __hpvm__bindIn(lbm_node, 3, 3, 0); } void lbmLvl2(float *srcG, size_t bytes_srcG, float *dstG, size_t bytes_dstG, size_t dim_X1, size_t dim_X2, size_t dim_Y2) { - __visc__hint(visc::CPU_TARGET); - __visc__attributes(2, srcG, dstG, 1, dstG); - void *lbm_node = __visc__createNodeND(2, lbmLvl1, dim_X2, dim_Y2); - __visc__bindIn(lbm_node, 0, 0, 0); - __visc__bindIn(lbm_node, 1, 1, 0); - __visc__bindIn(lbm_node, 2, 2, 0); - __visc__bindIn(lbm_node, 3, 3, 0); - __visc__bindIn(lbm_node, 4, 4, 0); + __hpvm__hint(hpvm::CPU_TARGET); + __hpvm__attributes(2, srcG, dstG, 1, dstG); + void *lbm_node = __hpvm__createNodeND(2, lbmLvl1, dim_X2, dim_Y2); + __hpvm__bindIn(lbm_node, 0, 0, 0); + __hpvm__bindIn(lbm_node, 1, 1, 0); + __hpvm__bindIn(lbm_node, 2, 2, 0); + __hpvm__bindIn(lbm_node, 3, 3, 0); + __hpvm__bindIn(lbm_node, 4, 4, 0); } void lbmLvl3(float *srcG, size_t bytes_srcG, float *dstG, size_t bytes_dstG, size_t dim_X1, size_t dim_X2, size_t dim_Y2) { - __visc__hint(visc::CPU_TARGET); - __visc__attributes(2, srcG, dstG, 1, dstG); - void *lbm_node = __visc__createNodeND(0, lbmLvl2); - __visc__bindIn(lbm_node, 0, 0, 0); - __visc__bindIn(lbm_node, 1, 1, 0); - __visc__bindIn(lbm_node, 2, 2, 0); - __visc__bindIn(lbm_node, 3, 3, 0); - __visc__bindIn(lbm_node, 4, 4, 0); - __visc__bindIn(lbm_node, 5, 5, 0); - __visc__bindIn(lbm_node, 6, 6, 0); + __hpvm__hint(hpvm::CPU_TARGET); + __hpvm__attributes(2, srcG, dstG, 1, dstG); + void *lbm_node = __hpvm__createNodeND(0, lbmLvl2); + __hpvm__bindIn(lbm_node, 0, 0, 0); + __hpvm__bindIn(lbm_node, 1, 1, 0); + __hpvm__bindIn(lbm_node, 2, 2, 0); + __hpvm__bindIn(lbm_node, 3, 3, 0); + __hpvm__bindIn(lbm_node, 4, 4, 0); + __hpvm__bindIn(lbm_node, 5, 5, 0); + __hpvm__bindIn(lbm_node, 6, 6, 0); } __attribute__((noinline)) void MAIN_performStreamCollide(LBM_Grid src, @@ -321,9 +321,9 @@ __attribute__((noinline)) void MAIN_performStreamCollide(LBM_Grid src, RootIn root_in_local = {src - MARGIN, size, dst - MARGIN, size, SIZE_X, SIZE_Y, SIZE_Z}; *(RootIn *)root_in = root_in_local; - void *lbmDFG = __visc__launch(0, lbmLvl3, root_in); + void *lbmDFG = __hpvm__launch(0, lbmLvl3, root_in); - __visc__wait(lbmDFG); + __hpvm__wait(lbmDFG); } void MAIN_initialize(const MAIN_Param *param) { @@ -379,12 +379,12 @@ int main(int nArgs, char *arg[]) { MAIN_initialize(¶m); pb_InitializeTimerSet(&timers); - __visc__init(); + __hpvm__init(); size_t size = TOTAL_PADDED_CELLS * N_CELL_ENTRIES * sizeof(float); - pb_SwitchToTimer(&timers, visc_TimerID_MEM_TRACK); - llvm_visc_track_mem(srcGrid - MARGIN, size); - llvm_visc_track_mem(dstGrid - MARGIN, size); + pb_SwitchToTimer(&timers, hpvm_TimerID_MEM_TRACK); + llvm_hpvm_track_mem(srcGrid - MARGIN, size); + llvm_hpvm_track_mem(dstGrid - MARGIN, size); pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); for (t = 1; t <= param.nTimeSteps; t++) { @@ -404,15 +404,15 @@ int main(int nArgs, char *arg[]) { } pb_SwitchToTimer(&timers, pb_TimerID_COPY); - llvm_visc_request_mem(srcGrid - MARGIN, size); + llvm_hpvm_request_mem(srcGrid - MARGIN, size); - pb_SwitchToTimer(&timers, visc_TimerID_MEM_UNTRACK); - llvm_visc_untrack_mem(srcGrid - MARGIN); - llvm_visc_untrack_mem(dstGrid - MARGIN); + pb_SwitchToTimer(&timers, hpvm_TimerID_MEM_UNTRACK); + llvm_hpvm_untrack_mem(srcGrid - MARGIN); + llvm_hpvm_untrack_mem(dstGrid - MARGIN); pb_SwitchToTimer(&timers, pb_TimerID_NONE); pb_PrintTimerSet(&timers); - __visc__cleanup(); + __hpvm__cleanup(); /*pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);*/ MAIN_finalize(¶m); diff --git a/hpvm/test/parboil/benchmarks/lbm/src/visc/main.h b/hpvm/test/parboil/benchmarks/lbm/src/hpvm/main.h similarity index 100% rename from hpvm/test/parboil/benchmarks/lbm/src/visc/main.h rename to hpvm/test/parboil/benchmarks/lbm/src/hpvm/main.h diff --git a/hpvm/test/parboil/benchmarks/sgemm/Makefile b/hpvm/test/parboil/benchmarks/sgemm/Makefile index ace9ded22b6ef365c9cd0f6262245dd2e086643d..4757432d224ea5a1aaa762bfc89c1c89e869bd32 100644 --- a/hpvm/test/parboil/benchmarks/sgemm/Makefile +++ b/hpvm/test/parboil/benchmarks/sgemm/Makefile @@ -1,9 +1,9 @@ PARBOIL_ROOT = $(LLVM_SRC_ROOT)/tools/hpvm/test/parboil APP = sgemm -# Default compile visc +# Default compile hpvm ifeq ($(VERSION),) - VERSION = visc_sh + VERSION = hpvm_sh endif # Default use small test case diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/visc/Makefile b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm/Makefile similarity index 83% rename from hpvm/test/parboil/benchmarks/sgemm/src/visc/Makefile rename to hpvm/test/parboil/benchmarks/sgemm/src/hpvm/Makefile index d1f6c96d0c279bc2f2e3e70313369d49881b62b8..6e63f8384190ff75c281592df1ab3843b017d07f 100644 --- a/hpvm/test/parboil/benchmarks/sgemm/src/visc/Makefile +++ b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm/Makefile @@ -1,8 +1,8 @@ # (c) 2010 The Board of Trustees of the University of Illinois. -LANGUAGE=visc +LANGUAGE=hpvm SRCDIR_OBJS=io.ll #compute_gold.o -VISC_OBJS=main.visc.ll +HPVM_OBJS=main.hpvm.ll APP_CUDALDFLAGS=-lm -lstdc++ APP_CFLAGS=-ffast-math -O1 APP_CXXFLAGS=-ffast-math -O1 diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/visc/io.cc b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm/io.cc similarity index 100% rename from hpvm/test/parboil/benchmarks/sgemm/src/visc/io.cc rename to hpvm/test/parboil/benchmarks/sgemm/src/hpvm/io.cc diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/visc/kernel.cl b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm/kernel.cl similarity index 100% rename from hpvm/test/parboil/benchmarks/sgemm/src/visc/kernel.cl rename to hpvm/test/parboil/benchmarks/sgemm/src/hpvm/kernel.cl diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/visc/main.cc b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm/main.cc similarity index 69% rename from hpvm/test/parboil/benchmarks/sgemm/src/visc/main.cc rename to hpvm/test/parboil/benchmarks/sgemm/src/hpvm/main.cc index 627f5a82412374cff4a9061620ce1f27ea3c14a6..de36705707d7062b4cef2042197902c2c415e312 100644 --- a/hpvm/test/parboil/benchmarks/sgemm/src/visc/main.cc +++ b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm/main.cc @@ -10,6 +10,7 @@ * Main entry of dense matrix-matrix multiplication kernel */ +#include <hpvm.h> #include <iostream> #include <malloc.h> #include <math.h> @@ -19,7 +20,6 @@ #include <string.h> #include <sys/time.h> #include <vector> -#include <visc.h> // I/O routines extern bool readColMajorMatrixFile(const char *fn, int &nr_row, int &nr_col, @@ -57,17 +57,17 @@ typedef struct __attribute__((__packed__)) { void mysgemmNT(float *A, size_t bytes_A, int lda, float *B, size_t bytes_B, int ldb, float *C, size_t bytes_C, int ldc, int k, float alpha, float beta) { - __visc__hint(visc::DEVICE); - __visc__attributes(3, A, B, C, 1, C); - - void *thisNode = __visc__getNode(); - void *parentNode = __visc__getParentNode(thisNode); - int lx = __visc__getNodeInstanceID_x(thisNode); - int ly = __visc__getNodeInstanceID_y(thisNode); - int gx = __visc__getNodeInstanceID_x(parentNode); - int gy = __visc__getNodeInstanceID_y(parentNode); - int gridx = __visc__getNumNodeInstances_x(thisNode); - int gridy = __visc__getNumNodeInstances_y(thisNode); + __hpvm__hint(hpvm::DEVICE); + __hpvm__attributes(3, A, B, C, 1, C); + + void *thisNode = __hpvm__getNode(); + void *parentNode = __hpvm__getParentNode(thisNode); + int lx = __hpvm__getNodeInstanceID_x(thisNode); + int ly = __hpvm__getNodeInstanceID_y(thisNode); + int gx = __hpvm__getNodeInstanceID_x(parentNode); + int gy = __hpvm__getNodeInstanceID_y(parentNode); + int gridx = __hpvm__getNumNodeInstances_x(thisNode); + int gridy = __hpvm__getNumNodeInstances_y(thisNode); int m = gx * gridx + lx; int n = gy * gridy + ly; @@ -83,46 +83,46 @@ void mysgemmNT(float *A, size_t bytes_A, int lda, float *B, size_t bytes_B, void basicSgemmLvl1(float *A, size_t bytes_A, int lda, float *B, size_t bytes_B, int ldb, float *C, size_t bytes_C, int ldc, int k, float alpha, float beta, size_t dim_X1, size_t dim_Y1) { - __visc__hint(visc::DEVICE); - __visc__attributes(3, A, B, C, 1, C); + __hpvm__hint(hpvm::DEVICE); + __hpvm__attributes(3, A, B, C, 1, C); void *sgemm_node = - __visc__createNodeND(2, mysgemmNT, (size_t)dim_X1, (size_t)dim_Y1); - __visc__bindIn(sgemm_node, 0, 0, 0); - __visc__bindIn(sgemm_node, 1, 1, 0); - __visc__bindIn(sgemm_node, 2, 2, 0); - __visc__bindIn(sgemm_node, 3, 3, 0); - __visc__bindIn(sgemm_node, 4, 4, 0); - __visc__bindIn(sgemm_node, 5, 5, 0); - __visc__bindIn(sgemm_node, 6, 6, 0); - __visc__bindIn(sgemm_node, 7, 7, 0); - __visc__bindIn(sgemm_node, 8, 8, 0); - __visc__bindIn(sgemm_node, 9, 9, 0); - __visc__bindIn(sgemm_node, 10, 10, 0); - __visc__bindIn(sgemm_node, 11, 11, 0); + __hpvm__createNodeND(2, mysgemmNT, (size_t)dim_X1, (size_t)dim_Y1); + __hpvm__bindIn(sgemm_node, 0, 0, 0); + __hpvm__bindIn(sgemm_node, 1, 1, 0); + __hpvm__bindIn(sgemm_node, 2, 2, 0); + __hpvm__bindIn(sgemm_node, 3, 3, 0); + __hpvm__bindIn(sgemm_node, 4, 4, 0); + __hpvm__bindIn(sgemm_node, 5, 5, 0); + __hpvm__bindIn(sgemm_node, 6, 6, 0); + __hpvm__bindIn(sgemm_node, 7, 7, 0); + __hpvm__bindIn(sgemm_node, 8, 8, 0); + __hpvm__bindIn(sgemm_node, 9, 9, 0); + __hpvm__bindIn(sgemm_node, 10, 10, 0); + __hpvm__bindIn(sgemm_node, 11, 11, 0); } void basicSgemmLvl2(float *A, size_t bytes_A, int lda, float *B, size_t bytes_B, int ldb, float *C, size_t bytes_C, int ldc, int k, float alpha, float beta, size_t dim_X1, size_t dim_Y1, size_t dim_X2, size_t dim_Y2) { - __visc__hint(visc::CPU_TARGET); - __visc__attributes(3, A, B, C, 1, C); + __hpvm__hint(hpvm::CPU_TARGET); + __hpvm__attributes(3, A, B, C, 1, C); void *sgemm_node = - __visc__createNodeND(2, basicSgemmLvl1, (size_t)dim_X2, (size_t)dim_Y2); - __visc__bindIn(sgemm_node, 0, 0, 0); - __visc__bindIn(sgemm_node, 1, 1, 0); - __visc__bindIn(sgemm_node, 2, 2, 0); - __visc__bindIn(sgemm_node, 3, 3, 0); - __visc__bindIn(sgemm_node, 4, 4, 0); - __visc__bindIn(sgemm_node, 5, 5, 0); - __visc__bindIn(sgemm_node, 6, 6, 0); - __visc__bindIn(sgemm_node, 7, 7, 0); - __visc__bindIn(sgemm_node, 8, 8, 0); - __visc__bindIn(sgemm_node, 9, 9, 0); - __visc__bindIn(sgemm_node, 10, 10, 0); - __visc__bindIn(sgemm_node, 11, 11, 0); - __visc__bindIn(sgemm_node, 12, 12, 0); - __visc__bindIn(sgemm_node, 13, 13, 0); + __hpvm__createNodeND(2, basicSgemmLvl1, (size_t)dim_X2, (size_t)dim_Y2); + __hpvm__bindIn(sgemm_node, 0, 0, 0); + __hpvm__bindIn(sgemm_node, 1, 1, 0); + __hpvm__bindIn(sgemm_node, 2, 2, 0); + __hpvm__bindIn(sgemm_node, 3, 3, 0); + __hpvm__bindIn(sgemm_node, 4, 4, 0); + __hpvm__bindIn(sgemm_node, 5, 5, 0); + __hpvm__bindIn(sgemm_node, 6, 6, 0); + __hpvm__bindIn(sgemm_node, 7, 7, 0); + __hpvm__bindIn(sgemm_node, 8, 8, 0); + __hpvm__bindIn(sgemm_node, 9, 9, 0); + __hpvm__bindIn(sgemm_node, 10, 10, 0); + __hpvm__bindIn(sgemm_node, 11, 11, 0); + __hpvm__bindIn(sgemm_node, 12, 12, 0); + __hpvm__bindIn(sgemm_node, 13, 13, 0); } // A wrapper level used in codegen for some backends @@ -130,25 +130,25 @@ void basicSgemmLvl3(float *A, size_t bytes_A, int lda, float *B, size_t bytes_B, int ldb, float *C, size_t bytes_C, int ldc, int k, float alpha, float beta, size_t dim_X1, size_t dim_Y1, size_t dim_X2, size_t dim_Y2) { - __visc__hint(visc::CPU_TARGET); - __visc__attributes(3, A, B, C, 1, C); - void *sgemm_node = __visc__createNodeND(0, basicSgemmLvl2); - __visc__bindIn(sgemm_node, 0, 0, 0); - __visc__bindIn(sgemm_node, 1, 1, 0); - __visc__bindIn(sgemm_node, 2, 2, 0); - __visc__bindIn(sgemm_node, 3, 3, 0); - __visc__bindIn(sgemm_node, 4, 4, 0); - __visc__bindIn(sgemm_node, 5, 5, 0); - __visc__bindIn(sgemm_node, 6, 6, 0); - __visc__bindIn(sgemm_node, 7, 7, 0); - __visc__bindIn(sgemm_node, 8, 8, 0); - __visc__bindIn(sgemm_node, 9, 9, 0); - __visc__bindIn(sgemm_node, 10, 10, 0); - __visc__bindIn(sgemm_node, 11, 11, 0); - __visc__bindIn(sgemm_node, 12, 12, 0); - __visc__bindIn(sgemm_node, 13, 13, 0); - __visc__bindIn(sgemm_node, 14, 14, 0); - __visc__bindIn(sgemm_node, 15, 15, 0); + __hpvm__hint(hpvm::CPU_TARGET); + __hpvm__attributes(3, A, B, C, 1, C); + void *sgemm_node = __hpvm__createNodeND(0, basicSgemmLvl2); + __hpvm__bindIn(sgemm_node, 0, 0, 0); + __hpvm__bindIn(sgemm_node, 1, 1, 0); + __hpvm__bindIn(sgemm_node, 2, 2, 0); + __hpvm__bindIn(sgemm_node, 3, 3, 0); + __hpvm__bindIn(sgemm_node, 4, 4, 0); + __hpvm__bindIn(sgemm_node, 5, 5, 0); + __hpvm__bindIn(sgemm_node, 6, 6, 0); + __hpvm__bindIn(sgemm_node, 7, 7, 0); + __hpvm__bindIn(sgemm_node, 8, 8, 0); + __hpvm__bindIn(sgemm_node, 9, 9, 0); + __hpvm__bindIn(sgemm_node, 10, 10, 0); + __hpvm__bindIn(sgemm_node, 11, 11, 0); + __hpvm__bindIn(sgemm_node, 12, 12, 0); + __hpvm__bindIn(sgemm_node, 13, 13, 0); + __hpvm__bindIn(sgemm_node, 14, 14, 0); + __hpvm__bindIn(sgemm_node, 15, 15, 0); } __attribute__((noinline)) void basicSgemm(char transa, char transb, int m, @@ -194,8 +194,8 @@ __attribute__((noinline)) void basicSgemm(char transa, char transb, int m, dg[0] / db[0], dg[1] / db[1]}; *(RootIn *)root_in = root_in_local; - void *sgemmDFG = __visc__launch(0, basicSgemmLvl3, root_in); - __visc__wait(sgemmDFG); + void *sgemmDFG = __hpvm__launch(0, basicSgemmLvl3, root_in); + __hpvm__wait(sgemmDFG); } int main(int argc, char *argv[]) { @@ -233,7 +233,7 @@ int main(int argc, char *argv[]) { readColMajorMatrixFile(params->inpFiles[2], matBcol, matBrow, matBT); pb_InitializeTimerSet(&timers); - __visc__init(); + __hpvm__init(); pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); // copy A to device memory @@ -246,9 +246,9 @@ int main(int argc, char *argv[]) { // OpenCL memory allocation std::vector<float> matC(matArow * matBcol); - llvm_visc_track_mem(&matA.front(), A_sz); - llvm_visc_track_mem(&matBT.front(), B_sz); - llvm_visc_track_mem(&matC.front(), C_sz); + llvm_hpvm_track_mem(&matA.front(), A_sz); + llvm_hpvm_track_mem(&matBT.front(), B_sz); + llvm_hpvm_track_mem(&matC.front(), C_sz); // Copy A and B^T into device memory pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); @@ -263,16 +263,16 @@ int main(int argc, char *argv[]) { matArow); pb_SwitchToTimer(&timers, pb_TimerID_COPY); - llvm_visc_request_mem(&matC.front(), C_sz); + llvm_hpvm_request_mem(&matC.front(), C_sz); - pb_SwitchToTimer(&timers, visc_TimerID_MEM_UNTRACK); - llvm_visc_untrack_mem(&matA.front()); - llvm_visc_untrack_mem(&matBT.front()); - llvm_visc_untrack_mem(&matC.front()); + pb_SwitchToTimer(&timers, hpvm_TimerID_MEM_UNTRACK); + llvm_hpvm_untrack_mem(&matA.front()); + llvm_hpvm_untrack_mem(&matBT.front()); + llvm_hpvm_untrack_mem(&matC.front()); pb_SwitchToTimer(&timers, pb_TimerID_NONE); pb_PrintTimerSet(&timers); - __visc__cleanup(); + __hpvm__cleanup(); if (params->outFile) { diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_opt/Makefile b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_opt/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..2234bf54e1e665f95b38dd0e25c2fe1b5539ce4e --- /dev/null +++ b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_opt/Makefile @@ -0,0 +1,8 @@ +# (c) 2010 The Board of Trustees of the University of Illinois. + +LANGUAGE=hpvm +SRCDIR_OBJS=io.ll #compute_gold.o +HPVM_OBJS=main.hpvm.ll +APP_CUDALDFLAGS=-lm -lstdc++ +APP_CFLAGS=-ffast-math -O3 +APP_CXXFLAGS=-ffast-math -O3 diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_opt/io.cc b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_opt/io.cc new file mode 100644 index 0000000000000000000000000000000000000000..04744f404ebaf6e669c2bbe91600519742b57dc9 --- /dev/null +++ b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_opt/io.cc @@ -0,0 +1,84 @@ +/*************************************************************************** + *cr + *cr (C) Copyright 2010 The Board of Trustees of the + *cr University of Illinois + *cr All Rights Reserved + *cr + ***************************************************************************/ + +/* I/O routines for reading and writing matrices in column-major + * layout + */ + +#include <fstream> +#include <iostream> +#include <vector> + +char *readFile(const char *fileName) { + std::fstream f(fileName, std::fstream::in); + if (!f.good()) { + std::cerr << "Error Reading File!!" << std::endl; + return NULL; + } + + f.seekg(0, std::ios::end); + int length = f.tellg(); + f.seekg(0, std::ios::beg); + + char *buffer; + + if (length > 0) { + buffer = new char[length]; + f.read(buffer, length); + buffer[length - 1] = 0; + } else { + buffer = new char; + buffer[0] = 0; + } + + f.close(); + + return buffer; +} + +bool readColMajorMatrixFile(const char *fn, int &nr_row, int &nr_col, + std::vector<float> &v) { + std::cerr << "Opening file:" << fn << std::endl; + std::fstream f(fn, std::fstream::in); + if (!f.good()) { + return false; + } + + // Read # of rows and cols + f >> nr_row; + f >> nr_col; + + float data; + std::cerr << "Matrix dimension: " << nr_row << "x" << nr_col << std::endl; + while (f.good()) { + f >> data; + v.push_back(data); + } + v.pop_back(); // remove the duplicated last element + return true; +} + +bool writeColMajorMatrixFile(const char *fn, int nr_row, int nr_col, + std::vector<float> &v) { + std::cerr << "Opening file:" << fn << " for write." << std::endl; + std::fstream f(fn, std::fstream::out); + if (!f.good()) { + return false; + } + + // Read # of rows and cols + f << nr_row << " " << nr_col << " "; + + float data; + std::cerr << "Matrix dimension: " << nr_row << "x" << nr_col << std::endl; + for (int i = 0; i < v.size(); ++i) { + f << v[i] << ' '; + } + f << "\n"; + return true; +} diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_opt/kernel.cl b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_opt/kernel.cl new file mode 100644 index 0000000000000000000000000000000000000000..ae0f5b60f4b800515bd84a04b02926acd625665c --- /dev/null +++ b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_opt/kernel.cl @@ -0,0 +1,40 @@ +/*************************************************************************** + *cr + *cr (C) Copyright 2010 The Board of Trustees of the + *cr University of Illinois + *cr All Rights Reserved + *cr + ***************************************************************************/ + +/* + * Kernel of dense matrix-matrix multiplication kernel. + */ + +__kernel void mysgemmNT( __global const float *A, int lda, __global const float *B, int ldb, __global float* C, int ldc, int k, float alpha, float beta ) +{ + // Partial results + float c[TILE_N]; + for (int i=0; i < TILE_N; i++) + c[i] = 0.0f; + + int mid = get_local_id(1)*get_local_size(0)+get_local_id(0); + int m = get_group_id(0) * TILE_M + mid; + + int b_base = 0; + + for (int i = 0; i < k; i+=TILE_TB_HEIGHT) { + float a; + b_base = get_group_id(1) * TILE_N + i * ldb; + + for (int j = 0; j < TILE_TB_HEIGHT; j++) { + a = A[m + (i+j)*lda]; + for (int kk = 0; kk < TILE_N; kk++) + c[kk] += a * B[b_base + j * ldb + kk]; + + } + } + int t = ldc * get_group_id(1) * TILE_N + m; + for (int i = 0; i < TILE_N; i++) { + C[t+i*ldc] = C[t+i*ldc] * beta + alpha * c[i]; + } +} diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_opt/main.cc b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_opt/main.cc new file mode 100644 index 0000000000000000000000000000000000000000..a1db2e56a5c5639319d7be5f6a890d44c3a28421 --- /dev/null +++ b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_opt/main.cc @@ -0,0 +1,186 @@ +/*************************************************************************** + *cr + *cr (C) Copyright 2010 The Board of Trustees of the + *cr University of Illinois + *cr All Rights Reserved + *cr + ***************************************************************************/ + +/* + * Main entry of dense matrix-matrix multiplication kernel + */ + +#include <hpvm.h> +#include <iostream> +#include <malloc.h> +#include <math.h> +#include <parboil.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/time.h> +#include <vector> + +// I/O routines +extern bool readColMajorMatrixFile(const char *fn, int &nr_row, int &nr_col, + std::vector<float> &v); +extern bool writeColMajorMatrixFile(const char *fn, int, int, + std::vector<float> &); +extern char *readFile(const char *); + +// Parameters of tile sizes +#define TILE_N 16 +#define TILE_TB_HEIGHT 8 +#define TILE_M (TILE_N * TILE_TB_HEIGHT) + +#define CHECK_ERROR(errorMessage) \ + if (clStatus != CL_SUCCESS) { \ + std::cout << errorMessage << " Error!\n"; \ + std::cout << "Line: " << __LINE__ << "\n"; \ + exit(1); \ + } + +void mysgemmNT(float *A, int lda, float *B, int ldb, float *C, int ldc, int k, + float alpha, float beta) { + __hpvm__hint(hpvm::GPU_TARGET); + __hpvm__attributes(3, A, B, C, 1, C); + + float c[TILE_N]; + for (int i = 0; i < TILE_N; i++) + c[i] = 0.0f; + + int mid = get_local_id(1) * get_local_size(0) + get_local_id(0); + int m = get_group_id(0) * TILE_M + mid; + + int b_base = 0; + + for (int i = 0; i < k; i += TILE_TB_HEIGHT) { + float a; + b_base = get_group_id(1) * TILE_N + i * ldb; + + for (int j = 0; j < TILE_TB_HEIGHT; j++) { + a = A[m + (i + j) * lda]; + for (int kk = 0; kk < TILE_N; kk++) + c[kk] += a * B[b_base + j * ldb + kk]; + } + } + int t = ldc * get_group_id(1) * TILE_N + m; + for (int i = 0; i < TILE_N; i++) { + C[t + i * ldc] = C[t + i * ldc] * beta + alpha * c[i]; + } +} + +__attribute__((noinline)) void basicSgemm(char transa, char transb, int m, + int n, int k, float alpha, float *A, + size_t bytesA, int lda, float *B, + size_t bytesB, int ldb, float beta, + float *C, size_t bytesC, int ldc) { + if ((transa != 'N') && (transa != 'n')) { + std::cerr << "unsupported value of 'transa' in regtileSgemm()" << std::endl; + return; + } + + if ((transb != 'T') && (transb != 't')) { + std::cerr << "unsupported value of 'transb' in regtileSgemm()" << std::endl; + return; + } + + // In this code we assume the matrix sizes are multiple of tile size + if ((m % TILE_M) || (n % TILE_N)) { + std::cerr << "unsupported size of matrix. m should be multiple of " + << TILE_M << "; n should be multiple of " << TILE_N << std::endl; + return; + } + + unsigned db[2] = {TILE_N, TILE_TB_HEIGHT}; + // unsigned dg[2] = {m*TILE_N/TILE_M,n*TILE_TB_HEIGHT/TILE_N}; + unsigned dg[2] = {m * db[0] / TILE_M, n * db[1] / TILE_N}; + + unsigned sgemmDFG = __hpvm__node(mysgemmNT, 2, 2, db[0], db[1], dg[0] / db[0], + dg[1] / db[1], 12, A, bytesA, lda, B, bytesB, + ldb, C, bytesC, ldc, k, alpha, beta, 0); + __hpvm__wait(sgemmDFG); +} + +int main(int argc, char *argv[]) { + + struct pb_Parameters *params; + struct pb_TimerSet timers; + + size_t A_sz, B_sz, C_sz; + int matArow, matAcol; + int matBrow, matBcol; + std::vector<float> matA, matBT; + + /* Read command line. Expect 3 inputs: A, B and B^T + in column-major layout*/ + params = pb_ReadParameters(&argc, argv); + if ((params->inpFiles[0] == NULL) || (params->inpFiles[1] == NULL) || + (params->inpFiles[2] == NULL) || (params->inpFiles[3] != NULL)) { + fprintf(stderr, "Expecting three input filenames\n"); + exit(-1); + } + + /* Read in data */ + // load A + readColMajorMatrixFile(params->inpFiles[0], matArow, matAcol, matA); + + // load B^T + readColMajorMatrixFile(params->inpFiles[2], matBcol, matBrow, matBT); + + pb_InitializeTimerSet(&timers); + __hpvm__init(); + + pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); + // copy A to device memory + A_sz = matArow * matAcol * sizeof(float); + B_sz = matBrow * matBcol * sizeof(float); + + // allocate space for C + C_sz = matArow * matBcol * sizeof(float); + + // OpenCL memory allocation + std::vector<float> matC(matArow * matBcol); + + llvm_hpvm_track_mem(&matA.front(), A_sz); + llvm_hpvm_track_mem(&matBT.front(), B_sz); + llvm_hpvm_track_mem(&matC.front(), C_sz); + // Copy A and B^T into device memory + pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); + + for (size_t i = 0; i < matC.size(); i++) + matC[i] = 0.0f; + + pb_SwitchToTimer(&timers, pb_TimerID_NONE); + + // Use standard sgemm interface + basicSgemm('N', 'T', matArow, matBcol, matAcol, 1.0f, &matA.front(), A_sz, + matArow, &matBT.front(), B_sz, matBcol, 0.0f, &matC.front(), C_sz, + matArow); + + pb_SwitchToTimer(&timers, pb_TimerID_COPY); + llvm_hpvm_request_mem(&matC.front(), C_sz); + + pb_SwitchToTimer(&timers, hpvm_TimerID_MEM_UNTRACK); + llvm_hpvm_untrack_mem(&matA.front()); + llvm_hpvm_untrack_mem(&matBT.front()); + llvm_hpvm_untrack_mem(&matC.front()); + pb_SwitchToTimer(&timers, pb_TimerID_NONE); + + pb_PrintTimerSet(&timers); + __hpvm__cleanup(); + + if (params->outFile) { + + /* Write C to file */ + // pb_SwitchToTimer(&timers, pb_TimerID_IO); + writeColMajorMatrixFile(params->outFile, matArow, matBcol, matC); + } + + double GPUtime = pb_GetElapsedTime(&(timers.timers[pb_TimerID_KERNEL])); + std::cout << "GFLOPs = " << 2. * matArow * matBcol * matAcol / GPUtime / 1e9 + << std::endl; + pb_FreeParameters(params); + + return 0; +} diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_sh/Makefile b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_sh/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..f81bac47072bc017dcdcdccf373cdfbd0f21ceac --- /dev/null +++ b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_sh/Makefile @@ -0,0 +1,9 @@ +# (c) 2010 The Board of Trustees of the University of Illinois. + +LANGUAGE=hpvm +SRCDIR_OBJS=io.ll #compute_gold.o +HPVM_OBJS=main.hpvm.ll +APP_CUDALDFLAGS=-lm -lstdc++ +APP_CFLAGS=-ffast-math -O3 +APP_CXXFLAGS=-ffast-math -O3 +APP_OPTFLAGS=-unroll-threshold=300 -loop-unroll -sroa diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_sh/io.cc b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_sh/io.cc new file mode 100644 index 0000000000000000000000000000000000000000..04744f404ebaf6e669c2bbe91600519742b57dc9 --- /dev/null +++ b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_sh/io.cc @@ -0,0 +1,84 @@ +/*************************************************************************** + *cr + *cr (C) Copyright 2010 The Board of Trustees of the + *cr University of Illinois + *cr All Rights Reserved + *cr + ***************************************************************************/ + +/* I/O routines for reading and writing matrices in column-major + * layout + */ + +#include <fstream> +#include <iostream> +#include <vector> + +char *readFile(const char *fileName) { + std::fstream f(fileName, std::fstream::in); + if (!f.good()) { + std::cerr << "Error Reading File!!" << std::endl; + return NULL; + } + + f.seekg(0, std::ios::end); + int length = f.tellg(); + f.seekg(0, std::ios::beg); + + char *buffer; + + if (length > 0) { + buffer = new char[length]; + f.read(buffer, length); + buffer[length - 1] = 0; + } else { + buffer = new char; + buffer[0] = 0; + } + + f.close(); + + return buffer; +} + +bool readColMajorMatrixFile(const char *fn, int &nr_row, int &nr_col, + std::vector<float> &v) { + std::cerr << "Opening file:" << fn << std::endl; + std::fstream f(fn, std::fstream::in); + if (!f.good()) { + return false; + } + + // Read # of rows and cols + f >> nr_row; + f >> nr_col; + + float data; + std::cerr << "Matrix dimension: " << nr_row << "x" << nr_col << std::endl; + while (f.good()) { + f >> data; + v.push_back(data); + } + v.pop_back(); // remove the duplicated last element + return true; +} + +bool writeColMajorMatrixFile(const char *fn, int nr_row, int nr_col, + std::vector<float> &v) { + std::cerr << "Opening file:" << fn << " for write." << std::endl; + std::fstream f(fn, std::fstream::out); + if (!f.good()) { + return false; + } + + // Read # of rows and cols + f << nr_row << " " << nr_col << " "; + + float data; + std::cerr << "Matrix dimension: " << nr_row << "x" << nr_col << std::endl; + for (int i = 0; i < v.size(); ++i) { + f << v[i] << ' '; + } + f << "\n"; + return true; +} diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_sh/main.cc b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_sh/main.cc new file mode 100644 index 0000000000000000000000000000000000000000..de0d473ed6fe6724ef81f99b13e02d0de29b103b --- /dev/null +++ b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_sh/main.cc @@ -0,0 +1,350 @@ +/*************************************************************************** + *cr + *cr (C) Copyright 2010 The Board of Trustees of the + *cr University of Illinois + *cr All Rights Reserved + *cr + ***************************************************************************/ + +/* + * Main entry of dense matrix-matrix multiplication kernel + */ + +#include <hpvm.h> +#include <iostream> +#include <malloc.h> +#include <math.h> +#include <parboil.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/time.h> +#include <vector> + +// I/O routines +extern bool readColMajorMatrixFile(const char *fn, int &nr_row, int &nr_col, + std::vector<float> &v); +extern bool writeColMajorMatrixFile(const char *fn, int, int, + std::vector<float> &); +extern char *readFile(const char *); + +// Parameters of tile sizes +#define TILE_N 16 +#define TILE_TB_HEIGHT 8 +#define TILE_M (TILE_N * TILE_TB_HEIGHT) + +#define CHECK_ERROR(errorMessage) \ + if (clStatus != CL_SUCCESS) { \ + std::cout << errorMessage << " Error!\n"; \ + std::cout << "Line: " << __LINE__ << "\n"; \ + exit(1); \ + } + +typedef struct __attribute__((__packed__)) { + float *A; + size_t bytesA; + int lda; + float *B; + size_t bytesB; + int ldb; + float *C; + size_t bytesC; + int ldc; + int k; + float alpha; + float beta; + long block_x; + long block_y; + long grid_x; + long grid_y; +} RootIn; + +void packData(RootIn *args, float *A, size_t bytesA, int lda, float *B, + size_t bytesB, int ldb, float *C, size_t bytesC, int ldc, int k, + float alpha, float beta, long block_x, long block_y, long grid_x, + long grid_y) { + args->A = A; + args->bytesA = bytesA; + args->lda = lda; + args->B = B; + args->bytesB = bytesB; + args->ldb = ldb; + args->C = C; + args->bytesC = bytesC; + args->ldc = ldc; + args->k = k; + args->alpha = alpha; + args->beta = beta; + args->block_x = block_x; + args->block_y = block_y; + args->grid_x = grid_x; + args->grid_y = grid_y; +} + +void Allocation(long block_x, long block_y) { + void *shB = __hpvm__malloc(block_x * block_y * sizeof(float)); + __hpvm__return(2, shB, block_x * block_y * sizeof(float)); +} + +void SgemmLeaf(float *A, size_t bytesA, int lda, float *B, size_t bytesB, + int ldb, float *C, size_t bytesC, int ldc, int k, float alpha, + float beta, float *shB, size_t bytesshB) { + __hpvm__hint(hpvm::DEVICE); + //__hpvm__hint(hpvm::SPIR_TARGET); + //__hpvm__hint(hpvm::GPU_TARGET); + + __hpvm__attributes(3, A, B, C, 1, C); + + void *thisNode = __hpvm__getNode(); + void *parentNode = __hpvm__getParentNode(thisNode); + + long lx = __hpvm__getNodeInstanceID_x(thisNode); + long ly = __hpvm__getNodeInstanceID_y(thisNode); + + long gx = __hpvm__getNodeInstanceID_x(parentNode); + long gy = __hpvm__getNodeInstanceID_y(parentNode); + + long dimx = __hpvm__getNumNodeInstances_x(thisNode); + + float c[TILE_N]; + for (int i = 0; i < TILE_N; i++) + c[i] = 0.0f; + + int mid = ly * dimx + lx; + int m = gx * TILE_M + mid; + int n = gy * TILE_N + lx; + + for (int i = 0; i < k; i += TILE_TB_HEIGHT) { + float a; + // shB[ly][lx] = B[n+(i+ly)*ldb]; + shB[ly * dimx + lx] = B[n + (i + ly) * ldb]; + + __hpvm__barrier(); + for (int j = 0; j < TILE_TB_HEIGHT; j++) { + a = A[m + (i + j) * lda]; + for (int kk = 0; kk < TILE_N; kk++) { + // c[kk] += a * shB[j][kk]; + c[kk] += a * shB[j * dimx + kk]; + } + } + __hpvm__barrier(); + } + + int t = ldc * gy * TILE_N + m; + for (int i = 0; i < TILE_N; i++) { + C[t + i * ldc] = C[t + i * ldc] * beta + alpha * c[i]; + } +} + +// Work group node for sgemm - Creates allocation node and leaf (work item) node +void SgemmTB(float *A, size_t bytesA, int lda, float *B, size_t bytesB, int ldb, + float *C, size_t bytesC, int ldc, int k, float alpha, float beta, + long block_x, long block_y) { + __hpvm__hint(hpvm::CPU_TARGET); + __hpvm__attributes(3, A, B, C, 1, C); + void *AllocationNode = __hpvm__createNodeND(0, Allocation); + void *SgemmLeafNode = __hpvm__createNodeND(2, SgemmLeaf, block_x, block_y); + + // Bind edges + __hpvm__bindIn(SgemmLeafNode, 0, 0, 0); // Bind A + __hpvm__bindIn(SgemmLeafNode, 1, 1, 0); // Bind bytesA + __hpvm__bindIn(SgemmLeafNode, 2, 2, 0); // Bind lda + __hpvm__bindIn(SgemmLeafNode, 3, 3, 0); // Bind B + __hpvm__bindIn(SgemmLeafNode, 4, 4, 0); // Bind bytesB + __hpvm__bindIn(SgemmLeafNode, 5, 5, 0); // Bind ldb + __hpvm__bindIn(SgemmLeafNode, 6, 6, 0); // Bind C + __hpvm__bindIn(SgemmLeafNode, 7, 7, 0); // Bind bytesC + __hpvm__bindIn(SgemmLeafNode, 8, 8, 0); // Bind ldc + __hpvm__bindIn(SgemmLeafNode, 9, 9, 0); // Bind k + __hpvm__bindIn(SgemmLeafNode, 10, 10, 0); // Bind alpha + __hpvm__bindIn(SgemmLeafNode, 11, 11, 0); // Bind beta + + __hpvm__bindIn(AllocationNode, 12, 0, 0); // Bind block_x + __hpvm__bindIn(AllocationNode, 13, 1, 0); // Bind block_y + + // Create Edges between AllocationNode and BFSLeafNodeNode + __hpvm__edge(AllocationNode, SgemmLeafNode, 1, 0, 12, 0); // Edge local_B + __hpvm__edge(AllocationNode, SgemmLeafNode, 1, 1, 13, + 0); // Edge bytes_local_B +} + +// Root node for sgemm - Creates work group node +void SgemmRoot(float *A, size_t bytesA, int lda, // 0-2 + float *B, size_t bytesB, int ldb, // 3-5 + float *C, size_t bytesC, int ldc, // 6-8 + int k, float alpha, float beta, // 9-11 + long block_x, long block_y, long grid_x, long grid_y // 12-15 +) { + __hpvm__hint(hpvm::CPU_TARGET); + __hpvm__attributes(3, A, B, C, 1, C); + void *SgemmTBNode = __hpvm__createNodeND(2, SgemmTB, grid_x, grid_y); + + // Bind edges + __hpvm__bindIn(SgemmTBNode, 0, 0, 0); // Bind A + __hpvm__bindIn(SgemmTBNode, 1, 1, 0); // Bind bytesA + __hpvm__bindIn(SgemmTBNode, 2, 2, 0); // Bind lda + __hpvm__bindIn(SgemmTBNode, 3, 3, 0); // Bind B + __hpvm__bindIn(SgemmTBNode, 4, 4, 0); // Bind bytesB + __hpvm__bindIn(SgemmTBNode, 5, 5, 0); // Bind ldb + __hpvm__bindIn(SgemmTBNode, 6, 6, 0); // Bind C + __hpvm__bindIn(SgemmTBNode, 7, 7, 0); // Bind bytesC + __hpvm__bindIn(SgemmTBNode, 8, 8, 0); // Bind ldc + __hpvm__bindIn(SgemmTBNode, 9, 9, 0); // Bind k + __hpvm__bindIn(SgemmTBNode, 10, 10, 0); // Bind alpha + __hpvm__bindIn(SgemmTBNode, 11, 11, 0); // Bind beta + __hpvm__bindIn(SgemmTBNode, 12, 12, 0); // Bind block_x + __hpvm__bindIn(SgemmTBNode, 13, 13, 0); // Bind block_y +} + +void SgemmWrapper(float *A, size_t bytesA, int lda, // 0-2 + float *B, size_t bytesB, int ldb, // 3-5 + float *C, size_t bytesC, int ldc, // 6-8 + int k, float alpha, float beta, // 9-11 + long block_x, long block_y, long grid_x, long grid_y // 12-15 +) { + __hpvm__hint(hpvm::CPU_TARGET); + __hpvm__attributes(3, A, B, C, 1, C); + void *SgemmRootNode = __hpvm__createNodeND(0, SgemmRoot); + + // Bind edges + __hpvm__bindIn(SgemmRootNode, 0, 0, 0); // Bind A + __hpvm__bindIn(SgemmRootNode, 1, 1, 0); // Bind bytesA + __hpvm__bindIn(SgemmRootNode, 2, 2, 0); // Bind lda + __hpvm__bindIn(SgemmRootNode, 3, 3, 0); // Bind B + __hpvm__bindIn(SgemmRootNode, 4, 4, 0); // Bind bytesB + __hpvm__bindIn(SgemmRootNode, 5, 5, 0); // Bind ldb + __hpvm__bindIn(SgemmRootNode, 6, 6, 0); // Bind C + __hpvm__bindIn(SgemmRootNode, 7, 7, 0); // Bind bytesC + __hpvm__bindIn(SgemmRootNode, 8, 8, 0); // Bind ldc + __hpvm__bindIn(SgemmRootNode, 9, 9, 0); // Bind k + __hpvm__bindIn(SgemmRootNode, 10, 10, 0); // Bind alpha + __hpvm__bindIn(SgemmRootNode, 11, 11, 0); // Bind beta + __hpvm__bindIn(SgemmRootNode, 12, 12, 0); // Bind block_x + __hpvm__bindIn(SgemmRootNode, 13, 13, 0); // Bind block_y + __hpvm__bindIn(SgemmRootNode, 14, 14, 0); // Bind grid_x + __hpvm__bindIn(SgemmRootNode, 15, 15, 0); // Bind grid_y +} + +// Creates root node for sgemm +__attribute__((noinline)) void basicSgemm(struct pb_TimerSet *timers, + char transa, char transb, int m, + int n, int k, float alpha, float *A, + size_t bytesA, int lda, float *B, + size_t bytesB, int ldb, float beta, + float *C, size_t bytesC, int ldc) { + if ((transa != 'N') && (transa != 'n')) { + std::cerr << "unsupported value of 'transa' in regtileSgemm()" << std::endl; + return; + } + + if ((transb != 'T') && (transb != 't')) { + std::cerr << "unsupported value of 'transb' in regtileSgemm()" << std::endl; + return; + } + + // In this code we assume the matrix sizes are multiple of tile size + if ((m % TILE_M) || (n % TILE_N)) { + std::cerr << "unsupported size of matrix. m should be multiple of " + << TILE_M << "; n should be multiple of " << TILE_N << std::endl; + return; + } + + // unsigned db[2] = {TILE_N,TILE_TB_HEIGHT}; + // unsigned dg[2] = {m*TILE_N/TILE_M,n*TILE_TB_HEIGHT/TILE_N}; + + long block_x = TILE_N; + long block_y = TILE_TB_HEIGHT; + long grid_x = m / TILE_M; + long grid_y = n / TILE_N; + + // Pack data in struct + RootIn *args = (RootIn *)malloc(sizeof(RootIn)); + packData(args, A, bytesA, lda, B, bytesB, ldb, C, bytesC, ldc, k, alpha, beta, + block_x, block_y, grid_x, grid_y); + + pb_SwitchToTimer(timers, hpvm_TimerID_COMPUTATION); + void *sgemmDFG = __hpvm__launch(0, SgemmWrapper, (void *)args); + + __hpvm__wait(sgemmDFG); + pb_SwitchToTimer(timers, pb_TimerID_COMPUTE); +} + +int main(int argc, char *argv[]) { + + struct pb_Parameters *params; + struct pb_TimerSet timers; + + size_t A_sz, B_sz, C_sz; + int matArow, matAcol; + int matBrow, matBcol; + std::vector<float> matA, matBT; + + /* Read command line. Expect 3 inputs: A, B and B^T + in column-major layout*/ + params = pb_ReadParameters(&argc, argv); + if ((params->inpFiles[0] == NULL) || (params->inpFiles[1] == NULL) || + (params->inpFiles[2] == NULL) || (params->inpFiles[3] != NULL)) { + fprintf(stderr, "Expecting three input filenames\n"); + exit(-1); + } + + /* Read in data */ + // load A + readColMajorMatrixFile(params->inpFiles[0], matArow, matAcol, matA); + + // load B^T + readColMajorMatrixFile(params->inpFiles[2], matBcol, matBrow, matBT); + + pb_InitializeTimerSet(&timers); + __hpvm__init(); + + pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); + // copy A to device memory + A_sz = matArow * matAcol * sizeof(float); + B_sz = matBrow * matBcol * sizeof(float); + + // allocate space for C + C_sz = matArow * matBcol * sizeof(float); + + // OpenCL memory allocation + std::vector<float> matC(matArow * matBcol); + + llvm_hpvm_track_mem(&matA.front(), A_sz); + llvm_hpvm_track_mem(&matBT.front(), B_sz); + llvm_hpvm_track_mem(&matC.front(), C_sz); + // Copy A and B^T into device memory + pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); + + for (size_t i = 0; i < matC.size(); i++) + matC[i] = 0.0f; + + // Use standard sgemm interface + basicSgemm(&timers, 'N', 'T', matArow, matBcol, matAcol, 1.0f, &matA.front(), + A_sz, matArow, &matBT.front(), B_sz, matBcol, 0.0f, &matC.front(), + C_sz, matArow); + + pb_SwitchToTimer(&timers, pb_TimerID_COPY); + llvm_hpvm_request_mem(&matC.front(), C_sz); + + pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); + llvm_hpvm_untrack_mem(&matA.front()); + llvm_hpvm_untrack_mem(&matBT.front()); + llvm_hpvm_untrack_mem(&matC.front()); + pb_SwitchToTimer(&timers, pb_TimerID_NONE); + + pb_PrintTimerSet(&timers); + __hpvm__cleanup(); + + if (params->outFile) { + /* Write C to file */ + // pb_SwitchToTimer(&timers, pb_TimerID_IO); + writeColMajorMatrixFile(params->outFile, matArow, matBcol, matC); + } + + double GPUtime = pb_GetElapsedTime(&(timers.timers[pb_TimerID_KERNEL])); + std::cout << "GFLOPs = " << 2. * matArow * matBcol * matAcol / GPUtime / 1e9 + << std::endl; + pb_FreeParameters(params); + + return 0; +} diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_tc/Makefile b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_tc/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..2234bf54e1e665f95b38dd0e25c2fe1b5539ce4e --- /dev/null +++ b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_tc/Makefile @@ -0,0 +1,8 @@ +# (c) 2010 The Board of Trustees of the University of Illinois. + +LANGUAGE=hpvm +SRCDIR_OBJS=io.ll #compute_gold.o +HPVM_OBJS=main.hpvm.ll +APP_CUDALDFLAGS=-lm -lstdc++ +APP_CFLAGS=-ffast-math -O3 +APP_CXXFLAGS=-ffast-math -O3 diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_tc/io.cc b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_tc/io.cc new file mode 100644 index 0000000000000000000000000000000000000000..04744f404ebaf6e669c2bbe91600519742b57dc9 --- /dev/null +++ b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_tc/io.cc @@ -0,0 +1,84 @@ +/*************************************************************************** + *cr + *cr (C) Copyright 2010 The Board of Trustees of the + *cr University of Illinois + *cr All Rights Reserved + *cr + ***************************************************************************/ + +/* I/O routines for reading and writing matrices in column-major + * layout + */ + +#include <fstream> +#include <iostream> +#include <vector> + +char *readFile(const char *fileName) { + std::fstream f(fileName, std::fstream::in); + if (!f.good()) { + std::cerr << "Error Reading File!!" << std::endl; + return NULL; + } + + f.seekg(0, std::ios::end); + int length = f.tellg(); + f.seekg(0, std::ios::beg); + + char *buffer; + + if (length > 0) { + buffer = new char[length]; + f.read(buffer, length); + buffer[length - 1] = 0; + } else { + buffer = new char; + buffer[0] = 0; + } + + f.close(); + + return buffer; +} + +bool readColMajorMatrixFile(const char *fn, int &nr_row, int &nr_col, + std::vector<float> &v) { + std::cerr << "Opening file:" << fn << std::endl; + std::fstream f(fn, std::fstream::in); + if (!f.good()) { + return false; + } + + // Read # of rows and cols + f >> nr_row; + f >> nr_col; + + float data; + std::cerr << "Matrix dimension: " << nr_row << "x" << nr_col << std::endl; + while (f.good()) { + f >> data; + v.push_back(data); + } + v.pop_back(); // remove the duplicated last element + return true; +} + +bool writeColMajorMatrixFile(const char *fn, int nr_row, int nr_col, + std::vector<float> &v) { + std::cerr << "Opening file:" << fn << " for write." << std::endl; + std::fstream f(fn, std::fstream::out); + if (!f.good()) { + return false; + } + + // Read # of rows and cols + f << nr_row << " " << nr_col << " "; + + float data; + std::cerr << "Matrix dimension: " << nr_row << "x" << nr_col << std::endl; + for (int i = 0; i < v.size(); ++i) { + f << v[i] << ' '; + } + f << "\n"; + return true; +} diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_tc/main.cc b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_tc/main.cc new file mode 100644 index 0000000000000000000000000000000000000000..be39d713d55d1cb518083679fb1ea1ce717a4ca9 --- /dev/null +++ b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_tc/main.cc @@ -0,0 +1,180 @@ +/*************************************************************************** + *cr + *cr (C) Copyright 2010 The Board of Trustees of the + *cr University of Illinois + *cr All Rights Reserved + *cr + ***************************************************************************/ + +/* + * Main entry of dense matrix-matrix multiplication kernel + */ + +#include <hpvm.h> +#include <iostream> +#include <malloc.h> +#include <math.h> +#include <parboil.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/time.h> +#include <vector> + +// I/O routines +extern bool readColMajorMatrixFile(const char *fn, int &nr_row, int &nr_col, + std::vector<float> &v); +extern bool writeColMajorMatrixFile(const char *fn, int, int, + std::vector<float> &); +extern char *readFile(const char *); + +// Parameters of tile sizes +#define TILE_SZ 16 + +#define CHECK_ERROR(errorMessage) \ + if (clStatus != CL_SUCCESS) { \ + std::cout << errorMessage << " Error!\n"; \ + std::cout << "Line: " << __LINE__ << "\n"; \ + exit(1); \ + } + +void mysgemmNT(float *A, int lda, float *B, int ldb, float *C, int ldc, int k, + float alpha, float beta) { + __hpvm__attributes(3, A, B, C, 1, C); + float c0, c1, c2, c3; + c0 = c1 = c2 = c3 = 0.0f; + int m = 4 * get_global_id(0); + int n = get_global_id(1); + + for (int i = 0; i < k; ++i) { + float a0 = A[m + i * lda]; + float a1 = A[m + 1 + i * lda]; + float a2 = A[m + 2 + i * lda]; + float a3 = A[m + 3 + i * lda]; + + float b = B[n + i * ldb]; + + c0 += a0 * b; + c1 += a1 * b; + c2 += a2 * b; + c3 += a3 * b; + } + C[m + n * ldc] = C[m + n * ldc] * beta + alpha * c0; + C[m + 1 + n * ldc] = C[m + 1 + n * ldc] * beta + alpha * c1; + C[m + 2 + n * ldc] = C[m + 2 + n * ldc] * beta + alpha * c2; + C[m + 3 + n * ldc] = C[m + 3 + n * ldc] * beta + alpha * c3; +} + +__attribute__((noinline)) void basicSgemm(char transa, char transb, int m, + int n, int k, float alpha, float *A, + size_t bytesA, int lda, float *B, + size_t bytesB, int ldb, float beta, + float *C, size_t bytesC, int ldc) { + if ((transa != 'N') && (transa != 'n')) { + std::cerr << "unsupported value of 'transa' in regtileSgemm()" << std::endl; + return; + } + + if ((transb != 'T') && (transb != 't')) { + std::cerr << "unsupported value of 'transb' in regtileSgemm()" << std::endl; + return; + } + + // In this code we assume the matrix sizes are multiple of tile size + if ((m % TILE_SZ) || (n % TILE_SZ)) { + std::cerr << "unsupported size of matrix. m should be multiple of " + << TILE_SZ << "; n should be multiple of " << TILE_SZ + << std::endl; + } + + unsigned db[2] = {TILE_SZ / 4, TILE_SZ}; + unsigned dg[2] = {m / TILE_SZ * db[0], n / TILE_SZ * db[1]}; + + unsigned sgemmDFG = __hpvm__node(mysgemmNT, 2, 2, db[0], db[1], dg[0] / db[0], + dg[1] / db[1], 12, A, bytesA, lda, B, bytesB, + ldb, C, bytesC, ldc, k, alpha, beta, 0); + __hpvm__wait(sgemmDFG); +} + +int main(int argc, char *argv[]) { + + struct pb_Parameters *params; + struct pb_TimerSet timers; + + size_t A_sz, B_sz, C_sz; + int matArow, matAcol; + int matBrow, matBcol; + std::vector<float> matA, matBT; + + pb_InitializeTimerSet(&timers); + __hpvm__init(); + + /* Read command line. Expect 3 inputs: A, B and B^T + in column-major layout*/ + params = pb_ReadParameters(&argc, argv); + if ((params->inpFiles[0] == NULL) || (params->inpFiles[1] == NULL) || + (params->inpFiles[2] == NULL) || (params->inpFiles[3] != NULL)) { + fprintf(stderr, "Expecting three input filenames\n"); + exit(-1); + } + + /* Read in data */ + pb_SwitchToTimer(&timers, pb_TimerID_IO); + + // load A + readColMajorMatrixFile(params->inpFiles[0], matArow, matAcol, matA); + + // load B^T + readColMajorMatrixFile(params->inpFiles[2], matBcol, matBrow, matBT); + + pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); + // copy A to device memory + A_sz = matArow * matAcol * sizeof(float); + B_sz = matBrow * matBcol * sizeof(float); + + // allocate space for C + C_sz = matArow * matBcol * sizeof(float); + + // OpenCL memory allocation + std::vector<float> matC(matArow * matBcol); + + llvm_hpvm_track_mem(&matA.front(), A_sz); + llvm_hpvm_track_mem(&matBT.front(), B_sz); + llvm_hpvm_track_mem(&matC.front(), C_sz); + // Copy A and B^T into device memory + pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); + + for (size_t i = 0; i < matC.size(); i++) + matC[i] = 0.0f; + + pb_SwitchToTimer(&timers, pb_TimerID_NONE); + + // Use standard sgemm interface + basicSgemm('N', 'T', matArow, matBcol, matAcol, 1.0f, &matA.front(), A_sz, + matArow, &matBT.front(), B_sz, matBcol, 0.0f, &matC.front(), C_sz, + matArow); + + if (params->outFile) { + pb_SwitchToTimer(&timers, pb_TimerID_COPY); + + /* Write C to file */ + llvm_hpvm_request_mem(&matC.front(), C_sz); + pb_SwitchToTimer(&timers, pb_TimerID_IO); + writeColMajorMatrixFile(params->outFile, matArow, matBcol, matC); + } + + pb_SwitchToTimer(&timers, hpvm_TimerID_MEM_UNTRACK); + llvm_hpvm_untrack_mem(&matA.front()); + llvm_hpvm_untrack_mem(&matBT.front()); + llvm_hpvm_untrack_mem(&matC.front()); + pb_SwitchToTimer(&timers, pb_TimerID_NONE); + + double GPUtime = pb_GetElapsedTime(&(timers.timers[pb_TimerID_KERNEL])); + std::cout << "GFLOPs = " << 2. * matArow * matBcol * matAcol / GPUtime / 1e9 + << std::endl; + pb_PrintTimerSet(&timers); + __hpvm__cleanup(); + pb_FreeParameters(params); + + return 0; +} diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_tc_vec/Makefile b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_tc_vec/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..2234bf54e1e665f95b38dd0e25c2fe1b5539ce4e --- /dev/null +++ b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_tc_vec/Makefile @@ -0,0 +1,8 @@ +# (c) 2010 The Board of Trustees of the University of Illinois. + +LANGUAGE=hpvm +SRCDIR_OBJS=io.ll #compute_gold.o +HPVM_OBJS=main.hpvm.ll +APP_CUDALDFLAGS=-lm -lstdc++ +APP_CFLAGS=-ffast-math -O3 +APP_CXXFLAGS=-ffast-math -O3 diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_tc_vec/io.cc b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_tc_vec/io.cc new file mode 100644 index 0000000000000000000000000000000000000000..04744f404ebaf6e669c2bbe91600519742b57dc9 --- /dev/null +++ b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_tc_vec/io.cc @@ -0,0 +1,84 @@ +/*************************************************************************** + *cr + *cr (C) Copyright 2010 The Board of Trustees of the + *cr University of Illinois + *cr All Rights Reserved + *cr + ***************************************************************************/ + +/* I/O routines for reading and writing matrices in column-major + * layout + */ + +#include <fstream> +#include <iostream> +#include <vector> + +char *readFile(const char *fileName) { + std::fstream f(fileName, std::fstream::in); + if (!f.good()) { + std::cerr << "Error Reading File!!" << std::endl; + return NULL; + } + + f.seekg(0, std::ios::end); + int length = f.tellg(); + f.seekg(0, std::ios::beg); + + char *buffer; + + if (length > 0) { + buffer = new char[length]; + f.read(buffer, length); + buffer[length - 1] = 0; + } else { + buffer = new char; + buffer[0] = 0; + } + + f.close(); + + return buffer; +} + +bool readColMajorMatrixFile(const char *fn, int &nr_row, int &nr_col, + std::vector<float> &v) { + std::cerr << "Opening file:" << fn << std::endl; + std::fstream f(fn, std::fstream::in); + if (!f.good()) { + return false; + } + + // Read # of rows and cols + f >> nr_row; + f >> nr_col; + + float data; + std::cerr << "Matrix dimension: " << nr_row << "x" << nr_col << std::endl; + while (f.good()) { + f >> data; + v.push_back(data); + } + v.pop_back(); // remove the duplicated last element + return true; +} + +bool writeColMajorMatrixFile(const char *fn, int nr_row, int nr_col, + std::vector<float> &v) { + std::cerr << "Opening file:" << fn << " for write." << std::endl; + std::fstream f(fn, std::fstream::out); + if (!f.good()) { + return false; + } + + // Read # of rows and cols + f << nr_row << " " << nr_col << " "; + + float data; + std::cerr << "Matrix dimension: " << nr_row << "x" << nr_col << std::endl; + for (int i = 0; i < v.size(); ++i) { + f << v[i] << ' '; + } + f << "\n"; + return true; +} diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_tc_vec/main.cc b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_tc_vec/main.cc new file mode 100644 index 0000000000000000000000000000000000000000..be39d713d55d1cb518083679fb1ea1ce717a4ca9 --- /dev/null +++ b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_tc_vec/main.cc @@ -0,0 +1,180 @@ +/*************************************************************************** + *cr + *cr (C) Copyright 2010 The Board of Trustees of the + *cr University of Illinois + *cr All Rights Reserved + *cr + ***************************************************************************/ + +/* + * Main entry of dense matrix-matrix multiplication kernel + */ + +#include <hpvm.h> +#include <iostream> +#include <malloc.h> +#include <math.h> +#include <parboil.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/time.h> +#include <vector> + +// I/O routines +extern bool readColMajorMatrixFile(const char *fn, int &nr_row, int &nr_col, + std::vector<float> &v); +extern bool writeColMajorMatrixFile(const char *fn, int, int, + std::vector<float> &); +extern char *readFile(const char *); + +// Parameters of tile sizes +#define TILE_SZ 16 + +#define CHECK_ERROR(errorMessage) \ + if (clStatus != CL_SUCCESS) { \ + std::cout << errorMessage << " Error!\n"; \ + std::cout << "Line: " << __LINE__ << "\n"; \ + exit(1); \ + } + +void mysgemmNT(float *A, int lda, float *B, int ldb, float *C, int ldc, int k, + float alpha, float beta) { + __hpvm__attributes(3, A, B, C, 1, C); + float c0, c1, c2, c3; + c0 = c1 = c2 = c3 = 0.0f; + int m = 4 * get_global_id(0); + int n = get_global_id(1); + + for (int i = 0; i < k; ++i) { + float a0 = A[m + i * lda]; + float a1 = A[m + 1 + i * lda]; + float a2 = A[m + 2 + i * lda]; + float a3 = A[m + 3 + i * lda]; + + float b = B[n + i * ldb]; + + c0 += a0 * b; + c1 += a1 * b; + c2 += a2 * b; + c3 += a3 * b; + } + C[m + n * ldc] = C[m + n * ldc] * beta + alpha * c0; + C[m + 1 + n * ldc] = C[m + 1 + n * ldc] * beta + alpha * c1; + C[m + 2 + n * ldc] = C[m + 2 + n * ldc] * beta + alpha * c2; + C[m + 3 + n * ldc] = C[m + 3 + n * ldc] * beta + alpha * c3; +} + +__attribute__((noinline)) void basicSgemm(char transa, char transb, int m, + int n, int k, float alpha, float *A, + size_t bytesA, int lda, float *B, + size_t bytesB, int ldb, float beta, + float *C, size_t bytesC, int ldc) { + if ((transa != 'N') && (transa != 'n')) { + std::cerr << "unsupported value of 'transa' in regtileSgemm()" << std::endl; + return; + } + + if ((transb != 'T') && (transb != 't')) { + std::cerr << "unsupported value of 'transb' in regtileSgemm()" << std::endl; + return; + } + + // In this code we assume the matrix sizes are multiple of tile size + if ((m % TILE_SZ) || (n % TILE_SZ)) { + std::cerr << "unsupported size of matrix. m should be multiple of " + << TILE_SZ << "; n should be multiple of " << TILE_SZ + << std::endl; + } + + unsigned db[2] = {TILE_SZ / 4, TILE_SZ}; + unsigned dg[2] = {m / TILE_SZ * db[0], n / TILE_SZ * db[1]}; + + unsigned sgemmDFG = __hpvm__node(mysgemmNT, 2, 2, db[0], db[1], dg[0] / db[0], + dg[1] / db[1], 12, A, bytesA, lda, B, bytesB, + ldb, C, bytesC, ldc, k, alpha, beta, 0); + __hpvm__wait(sgemmDFG); +} + +int main(int argc, char *argv[]) { + + struct pb_Parameters *params; + struct pb_TimerSet timers; + + size_t A_sz, B_sz, C_sz; + int matArow, matAcol; + int matBrow, matBcol; + std::vector<float> matA, matBT; + + pb_InitializeTimerSet(&timers); + __hpvm__init(); + + /* Read command line. Expect 3 inputs: A, B and B^T + in column-major layout*/ + params = pb_ReadParameters(&argc, argv); + if ((params->inpFiles[0] == NULL) || (params->inpFiles[1] == NULL) || + (params->inpFiles[2] == NULL) || (params->inpFiles[3] != NULL)) { + fprintf(stderr, "Expecting three input filenames\n"); + exit(-1); + } + + /* Read in data */ + pb_SwitchToTimer(&timers, pb_TimerID_IO); + + // load A + readColMajorMatrixFile(params->inpFiles[0], matArow, matAcol, matA); + + // load B^T + readColMajorMatrixFile(params->inpFiles[2], matBcol, matBrow, matBT); + + pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); + // copy A to device memory + A_sz = matArow * matAcol * sizeof(float); + B_sz = matBrow * matBcol * sizeof(float); + + // allocate space for C + C_sz = matArow * matBcol * sizeof(float); + + // OpenCL memory allocation + std::vector<float> matC(matArow * matBcol); + + llvm_hpvm_track_mem(&matA.front(), A_sz); + llvm_hpvm_track_mem(&matBT.front(), B_sz); + llvm_hpvm_track_mem(&matC.front(), C_sz); + // Copy A and B^T into device memory + pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); + + for (size_t i = 0; i < matC.size(); i++) + matC[i] = 0.0f; + + pb_SwitchToTimer(&timers, pb_TimerID_NONE); + + // Use standard sgemm interface + basicSgemm('N', 'T', matArow, matBcol, matAcol, 1.0f, &matA.front(), A_sz, + matArow, &matBT.front(), B_sz, matBcol, 0.0f, &matC.front(), C_sz, + matArow); + + if (params->outFile) { + pb_SwitchToTimer(&timers, pb_TimerID_COPY); + + /* Write C to file */ + llvm_hpvm_request_mem(&matC.front(), C_sz); + pb_SwitchToTimer(&timers, pb_TimerID_IO); + writeColMajorMatrixFile(params->outFile, matArow, matBcol, matC); + } + + pb_SwitchToTimer(&timers, hpvm_TimerID_MEM_UNTRACK); + llvm_hpvm_untrack_mem(&matA.front()); + llvm_hpvm_untrack_mem(&matBT.front()); + llvm_hpvm_untrack_mem(&matC.front()); + pb_SwitchToTimer(&timers, pb_TimerID_NONE); + + double GPUtime = pb_GetElapsedTime(&(timers.timers[pb_TimerID_KERNEL])); + std::cout << "GFLOPs = " << 2. * matArow * matBcol * matAcol / GPUtime / 1e9 + << std::endl; + pb_PrintTimerSet(&timers); + __hpvm__cleanup(); + pb_FreeParameters(params); + + return 0; +} diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec/Makefile b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..2234bf54e1e665f95b38dd0e25c2fe1b5539ce4e --- /dev/null +++ b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec/Makefile @@ -0,0 +1,8 @@ +# (c) 2010 The Board of Trustees of the University of Illinois. + +LANGUAGE=hpvm +SRCDIR_OBJS=io.ll #compute_gold.o +HPVM_OBJS=main.hpvm.ll +APP_CUDALDFLAGS=-lm -lstdc++ +APP_CFLAGS=-ffast-math -O3 +APP_CXXFLAGS=-ffast-math -O3 diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec/io.cc b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec/io.cc new file mode 100644 index 0000000000000000000000000000000000000000..04744f404ebaf6e669c2bbe91600519742b57dc9 --- /dev/null +++ b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec/io.cc @@ -0,0 +1,84 @@ +/*************************************************************************** + *cr + *cr (C) Copyright 2010 The Board of Trustees of the + *cr University of Illinois + *cr All Rights Reserved + *cr + ***************************************************************************/ + +/* I/O routines for reading and writing matrices in column-major + * layout + */ + +#include <fstream> +#include <iostream> +#include <vector> + +char *readFile(const char *fileName) { + std::fstream f(fileName, std::fstream::in); + if (!f.good()) { + std::cerr << "Error Reading File!!" << std::endl; + return NULL; + } + + f.seekg(0, std::ios::end); + int length = f.tellg(); + f.seekg(0, std::ios::beg); + + char *buffer; + + if (length > 0) { + buffer = new char[length]; + f.read(buffer, length); + buffer[length - 1] = 0; + } else { + buffer = new char; + buffer[0] = 0; + } + + f.close(); + + return buffer; +} + +bool readColMajorMatrixFile(const char *fn, int &nr_row, int &nr_col, + std::vector<float> &v) { + std::cerr << "Opening file:" << fn << std::endl; + std::fstream f(fn, std::fstream::in); + if (!f.good()) { + return false; + } + + // Read # of rows and cols + f >> nr_row; + f >> nr_col; + + float data; + std::cerr << "Matrix dimension: " << nr_row << "x" << nr_col << std::endl; + while (f.good()) { + f >> data; + v.push_back(data); + } + v.pop_back(); // remove the duplicated last element + return true; +} + +bool writeColMajorMatrixFile(const char *fn, int nr_row, int nr_col, + std::vector<float> &v) { + std::cerr << "Opening file:" << fn << " for write." << std::endl; + std::fstream f(fn, std::fstream::out); + if (!f.good()) { + return false; + } + + // Read # of rows and cols + f << nr_row << " " << nr_col << " "; + + float data; + std::cerr << "Matrix dimension: " << nr_row << "x" << nr_col << std::endl; + for (int i = 0; i < v.size(); ++i) { + f << v[i] << ' '; + } + f << "\n"; + return true; +} diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec/kernel.cl b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec/kernel.cl new file mode 100644 index 0000000000000000000000000000000000000000..7530a400759e2d6db6ffd466c3f6aaf9dfab2117 --- /dev/null +++ b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec/kernel.cl @@ -0,0 +1,53 @@ +/*************************************************************************** + *cr + *cr (C) Copyright 2010 The Board of Trustees of the + *cr University of Illinois + *cr All Rights Reserved + *cr + ***************************************************************************/ + +/* + * Kernel of dense matrix-matrix multiplication kernel. + */ + +__kernel void mysgemmNT( __global float *A, size_t bytesA, int lda, __global float *B, size_t bytesB, int ldb, __global float* C, size_t bytesC, int ldc, int k, float alpha, float beta ) +{ +/* + // Partial results + float c[8]; + for (int i=0; i < 8; i++) + c[i] = 0.0f; + float a[8]; + float b[8]; + + int m = get_global_id(0) * 8; + int n = get_global_id(1); + + for (int i = 0; i < k; ++i) { + for (int id = 0; id < 8; id++) { + a[id] = A[m + id + i * lda]; + b[id] = B[n + i * ldb]; + c[id] += a[id] * b[id]; + } + } + + for (int id = 0; id < 8; id++) + C[m+id+n*ldc] = C[m+id+n*ldc] * beta + alpha * c[id]; +*/ + + // Partial results + float8 cp = (float8)(0.0f); + + int m = get_global_id(0) * 8; + int n = get_global_id(1); + + for (int i = 0; i < k; ++i) { + float8 a = vload8(0, A + (m + i * lda)); + float8 b = (float8)(B[n + i * ldb]); + cp += a * b; + } + + float8 c = vload8(0, C + (m+n*ldc)); + c = c * beta + alpha * cp; + vstore8(c, 0, C + (m+n*ldc)); +} diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec/main.cc b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec/main.cc new file mode 100644 index 0000000000000000000000000000000000000000..286297d6fefe0b6f72bdc9e8a9079a131a7b16bf --- /dev/null +++ b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec/main.cc @@ -0,0 +1,189 @@ +/*************************************************************************** + *cr + *cr (C) Copyright 2010 The Board of Trustees of the + *cr University of Illinois + *cr All Rights Reserved + *cr + ***************************************************************************/ + +/* + * Main entry of dense matrix-matrix multiplication kernel + */ + +#include <hpvm.h> +#include <iostream> +#include <malloc.h> +#include <math.h> +#include <parboil.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/time.h> +#include <vector> + +// I/O routines +extern bool readColMajorMatrixFile(const char *fn, int &nr_row, int &nr_col, + std::vector<float> &v); +extern bool writeColMajorMatrixFile(const char *fn, int, int, + std::vector<float> &); +extern char *readFile(const char *); + +// Parameters of tile sizes +#define TILE_SZ 16 +#define VEC_SZ 8 + +#define CHECK_ERROR(errorMessage) \ + if (clStatus != CL_SUCCESS) { \ + std::cout << errorMessage << " Error!\n"; \ + std::cout << "Line: " << __LINE__ << "\n"; \ + exit(1); \ + } + +void mysgemmNT(float *A, int lda, float *B, int ldb, float *C, int ldc, int k, + float alpha, float beta) { + __hpvm__hint(hpvm::GPU_TARGET); + __hpvm__attributes(3, A, B, C, 1, C); + + float c = 0.0f; + int m = get_global_id(0); + int n = get_global_id(1); + + for (int i = 0; i < k; ++i) { + float a = A[m + i * lda]; + float b = B[n + i * ldb]; + c += a * b; + } + C[m + n * ldc] = C[m + n * ldc] * beta + alpha * c; + /* + Will be substituted by this kernel at the llvm level + // Partial results + float8 cp = (float8)(0.0f); + + int m = get_global_id(0) * 8; + int n = get_global_id(1); + + for (int i = 0; i < k; ++i) { + float8 a = vload8(0, A + (m + i * lda)); + float8 b = (float8)(B[n + i * ldb]); + cp += a * b; + } + + float8 c = vload8(0, C + (m+n*ldc)); + c = c * beta + alpha * cp; + vstore8(c, 0, C + (m+n*ldc)); + */ +} + +__attribute__((noinline)) void basicSgemm(char transa, char transb, int m, + int n, int k, float alpha, float *A, + size_t bytesA, int lda, float *B, + size_t bytesB, int ldb, float beta, + float *C, size_t bytesC, int ldc) { + if ((transa != 'N') && (transa != 'n')) { + std::cerr << "unsupported value of 'transa' in regtileSgemm()" << std::endl; + return; + } + + if ((transb != 'T') && (transb != 't')) { + std::cerr << "unsupported value of 'transb' in regtileSgemm()" << std::endl; + return; + } + + // In this code we assume the matrix sizes are multiple of tile size + if ((m % TILE_SZ) || (n % TILE_SZ)) { + std::cerr << "unsupported size of matrix. m should be multiple of " + << TILE_SZ << "; n should be multiple of " << TILE_SZ + << std::endl; + } + + unsigned db[2] = {TILE_SZ / VEC_SZ, TILE_SZ}; + unsigned dg[2] = {m / TILE_SZ * db[0], n / TILE_SZ * db[1]}; + + unsigned sgemmDFG = __hpvm__node(mysgemmNT, 2, 2, db[0], db[1], dg[0] / db[0], + dg[1] / db[1], 12, A, bytesA, lda, B, bytesB, + ldb, C, bytesC, ldc, k, alpha, beta, 0); + __hpvm__wait(sgemmDFG); +} + +int main(int argc, char *argv[]) { + + struct pb_Parameters *params; + struct pb_TimerSet timers; + + size_t A_sz, B_sz, C_sz; + int matArow, matAcol; + int matBrow, matBcol; + std::vector<float> matA, matBT; + + /* Read command line. Expect 3 inputs: A, B and B^T + in column-major layout*/ + params = pb_ReadParameters(&argc, argv); + if ((params->inpFiles[0] == NULL) || (params->inpFiles[1] == NULL) || + (params->inpFiles[2] == NULL) || (params->inpFiles[3] != NULL)) { + fprintf(stderr, "Expecting three input filenames\n"); + exit(-1); + } + + /* Read in data */ + // load A + readColMajorMatrixFile(params->inpFiles[0], matArow, matAcol, matA); + + // load B^T + readColMajorMatrixFile(params->inpFiles[2], matBcol, matBrow, matBT); + + pb_InitializeTimerSet(&timers); + __hpvm__init(); + + pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); + // copy A to device memory + A_sz = matArow * matAcol * sizeof(float); + B_sz = matBrow * matBcol * sizeof(float); + + // allocate space for C + C_sz = matArow * matBcol * sizeof(float); + + // OpenCL memory allocation + std::vector<float> matC(matArow * matBcol); + + llvm_hpvm_track_mem(&matA.front(), A_sz); + llvm_hpvm_track_mem(&matBT.front(), B_sz); + llvm_hpvm_track_mem(&matC.front(), C_sz); + // Copy A and B^T into device memory + pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); + + for (size_t i = 0; i < matC.size(); i++) + matC[i] = 0.0f; + + pb_SwitchToTimer(&timers, pb_TimerID_NONE); + + // Use standard sgemm interface + basicSgemm('N', 'T', matArow, matBcol, matAcol, 1.0f, &matA.front(), A_sz, + matArow, &matBT.front(), B_sz, matBcol, 0.0f, &matC.front(), C_sz, + matArow); + + pb_SwitchToTimer(&timers, pb_TimerID_COPY); + llvm_hpvm_request_mem(&matC.front(), C_sz); + + pb_SwitchToTimer(&timers, hpvm_TimerID_MEM_UNTRACK); + llvm_hpvm_untrack_mem(&matA.front()); + llvm_hpvm_untrack_mem(&matBT.front()); + llvm_hpvm_untrack_mem(&matC.front()); + pb_SwitchToTimer(&timers, pb_TimerID_NONE); + + pb_PrintTimerSet(&timers); + __hpvm__cleanup(); + + if (params->outFile) { + + /* Write C to file */ + // pb_SwitchToTimer(&timers, pb_TimerID_IO); + writeColMajorMatrixFile(params->outFile, matArow, matBcol, matC); + } + + double GPUtime = pb_GetElapsedTime(&(timers.timers[pb_TimerID_KERNEL])); + std::cout << "GFLOPs = " << 2. * matArow * matBcol * matAcol / GPUtime / 1e9 + << std::endl; + pb_FreeParameters(params); + + return 0; +} diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec_opt/Makefile b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec_opt/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..2234bf54e1e665f95b38dd0e25c2fe1b5539ce4e --- /dev/null +++ b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec_opt/Makefile @@ -0,0 +1,8 @@ +# (c) 2010 The Board of Trustees of the University of Illinois. + +LANGUAGE=hpvm +SRCDIR_OBJS=io.ll #compute_gold.o +HPVM_OBJS=main.hpvm.ll +APP_CUDALDFLAGS=-lm -lstdc++ +APP_CFLAGS=-ffast-math -O3 +APP_CXXFLAGS=-ffast-math -O3 diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec_opt/io.cc b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec_opt/io.cc new file mode 100644 index 0000000000000000000000000000000000000000..04744f404ebaf6e669c2bbe91600519742b57dc9 --- /dev/null +++ b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec_opt/io.cc @@ -0,0 +1,84 @@ +/*************************************************************************** + *cr + *cr (C) Copyright 2010 The Board of Trustees of the + *cr University of Illinois + *cr All Rights Reserved + *cr + ***************************************************************************/ + +/* I/O routines for reading and writing matrices in column-major + * layout + */ + +#include <fstream> +#include <iostream> +#include <vector> + +char *readFile(const char *fileName) { + std::fstream f(fileName, std::fstream::in); + if (!f.good()) { + std::cerr << "Error Reading File!!" << std::endl; + return NULL; + } + + f.seekg(0, std::ios::end); + int length = f.tellg(); + f.seekg(0, std::ios::beg); + + char *buffer; + + if (length > 0) { + buffer = new char[length]; + f.read(buffer, length); + buffer[length - 1] = 0; + } else { + buffer = new char; + buffer[0] = 0; + } + + f.close(); + + return buffer; +} + +bool readColMajorMatrixFile(const char *fn, int &nr_row, int &nr_col, + std::vector<float> &v) { + std::cerr << "Opening file:" << fn << std::endl; + std::fstream f(fn, std::fstream::in); + if (!f.good()) { + return false; + } + + // Read # of rows and cols + f >> nr_row; + f >> nr_col; + + float data; + std::cerr << "Matrix dimension: " << nr_row << "x" << nr_col << std::endl; + while (f.good()) { + f >> data; + v.push_back(data); + } + v.pop_back(); // remove the duplicated last element + return true; +} + +bool writeColMajorMatrixFile(const char *fn, int nr_row, int nr_col, + std::vector<float> &v) { + std::cerr << "Opening file:" << fn << " for write." << std::endl; + std::fstream f(fn, std::fstream::out); + if (!f.good()) { + return false; + } + + // Read # of rows and cols + f << nr_row << " " << nr_col << " "; + + float data; + std::cerr << "Matrix dimension: " << nr_row << "x" << nr_col << std::endl; + for (int i = 0; i < v.size(); ++i) { + f << v[i] << ' '; + } + f << "\n"; + return true; +} diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec_opt/kernel.cl b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec_opt/kernel.cl new file mode 100644 index 0000000000000000000000000000000000000000..cc6e708148f40c80186004d3febd66988c67ae37 --- /dev/null +++ b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec_opt/kernel.cl @@ -0,0 +1,86 @@ +/*************************************************************************** + *cr + *cr (C) Copyright 2010 The Board of Trustees of the + *cr University of Illinois + *cr All Rights Reserved + *cr + ***************************************************************************/ + +/* + * Kernel of dense matrix-matrix multiplication kernel. + */ + +// Parameters of tile sizes +#define TILE_N 8 +#define TILE_TB_HEIGHT 8 +#define TILE_M (TILE_N*TILE_TB_HEIGHT) + +__kernel void mysgemmNT( __global const float *A, int lda, __global const float *B, int ldb, __global float* C, int ldc, int k, float alpha, float beta ) +{ + + float c[TILE_N]; + for (int i=0; i < TILE_N; i++) + c[i] = 0.0f; + + int mid = get_local_id(1)*get_local_size(0)+get_local_id(0); + int m = get_group_id(0) * TILE_M + mid; + + int b_base = 0; + + for (int i = 0; i < k; i+=TILE_TB_HEIGHT) { + float a; + b_base = get_group_id(1) * TILE_N + i * ldb; + + for (int j = 0; j < TILE_TB_HEIGHT; j++) { + a = A[m + (i+j)*lda]; + for (int kk = 0; kk < TILE_N; kk++) + c[kk] += a * B[b_base + j * ldb + kk]; + + } + } + int t = ldc * get_group_id(1) * TILE_N + m; + for (int i = 0; i < TILE_N; i++) { + C[t+i*ldc] = C[t+i*ldc] * beta + alpha * c[i]; + } +/* + Will be substituted by this kernel at the llvm level + + // Partial results + floatn cp = (floatn)(0.0f); + + int mid = get_local_id(1)*get_local_size(0)+get_local_id(0); + int m = get_group_id(0) * TILE_M + mid; + + int b_base = 0; + + for (int i = 0; i < k; i+=TILE_TB_HEIGHT) { + float a; + b_base = get_group_id(1) * TILE_N + i * ldb; + + for (int j = 0; j < TILE_TB_HEIGHT; j++) { + a = A[m + (i+j)*lda]; + cp += a * vloadn(0, B + b_base + j * ldb); + } + } + + cp = alpha * cp; + float c[TILE_N]; + c[0] = cp.s0; + c[1] = cp.s1; + c[2] = cp.s2; + c[3] = cp.s3; + c[4] = cp.s4; + c[5] = cp.s5; + c[6] = cp.s6; + c[7] = cp.s7; + + int t = ldc * get_group_id(1) * TILE_N + m; + for (int i = 0; i < TILE_N; i++) { + C[t+i*ldc] = C[t+i*ldc] * beta + c[i]; + } + +*/ + +*/ + +} diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec_opt/main.cc b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec_opt/main.cc new file mode 100644 index 0000000000000000000000000000000000000000..8fbc45e08a9e2fd1e3af6cc03360086b354665d7 --- /dev/null +++ b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec_opt/main.cc @@ -0,0 +1,227 @@ +/*************************************************************************** + *cr + *cr (C) Copyright 2010 The Board of Trustees of the + *cr University of Illinois + *cr All Rights Reserved + *cr + ***************************************************************************/ + +/* + * Main entry of dense matrix-matrix multiplication kernel + */ + +#include <hpvm.h> +#include <iostream> +#include <malloc.h> +#include <math.h> +#include <parboil.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/time.h> +#include <vector> + +// I/O routines +extern bool readColMajorMatrixFile(const char *fn, int &nr_row, int &nr_col, + std::vector<float> &v); +extern bool writeColMajorMatrixFile(const char *fn, int, int, + std::vector<float> &); +extern char *readFile(const char *); + +// Parameters of tile sizes +#define TILE_N 8 +#define TILE_TB_HEIGHT 8 +#define TILE_M (TILE_N * TILE_TB_HEIGHT) + +#define CHECK_ERROR(errorMessage) \ + if (clStatus != CL_SUCCESS) { \ + std::cout << errorMessage << " Error!\n"; \ + std::cout << "Line: " << __LINE__ << "\n"; \ + exit(1); \ + } + +void mysgemmNT(float *A, int lda, float *B, int ldb, float *C, int ldc, int k, + float alpha, float beta) { + __hpvm__hint(hpvm::SPIR_TARGET); + __hpvm__attributes(3, A, B, C, 1, C); + + float c[TILE_N]; + for (int i = 0; i < TILE_N; i++) + c[i] = 0.0f; + + int mid = get_local_id(1) * get_local_size(0) + get_local_id(0); + int m = get_group_id(0) * TILE_M + mid; + + int b_base = 0; + + for (int i = 0; i < k; i += TILE_TB_HEIGHT) { + float a; + b_base = get_group_id(1) * TILE_N + i * ldb; + + for (int j = 0; j < TILE_TB_HEIGHT; j++) { + a = A[m + (i + j) * lda]; + for (int kk = 0; kk < TILE_N; kk++) + c[kk] += a * B[b_base + j * ldb + kk]; + } + } + int t = ldc * get_group_id(1) * TILE_N + m; + for (int i = 0; i < TILE_N; i++) { + C[t + i * ldc] = C[t + i * ldc] * beta + alpha * c[i]; + } + /* + Will be substituted by this kernel at the llvm level + + // Partial results + floatn cp = (floatn)(0.0f); + + int mid = get_local_id(1)*get_local_size(0)+get_local_id(0); + int m = get_group_id(0) * TILE_M + mid; + + int b_base = 0; + + for (int i = 0; i < k; i+=TILE_TB_HEIGHT) { + float a; + b_base = get_group_id(1) * TILE_N + i * ldb; + + for (int j = 0; j < TILE_TB_HEIGHT; j++) { + a = A[m + (i+j)*lda]; + cp += a * vloadn(0, B + b_base + j * ldb); + } + } + + cp = alpha * cp; + float c[TILE_N]; + c[0] = cp.s0; + c[1] = cp.s1; + c[2] = cp.s2; + c[3] = cp.s3; + c[4] = cp.s4; + c[5] = cp.s5; + c[6] = cp.s6; + c[7] = cp.s7; + + int t = ldc * get_group_id(1) * TILE_N + m; + for (int i = 0; i < TILE_N; i++) { + C[t+i*ldc] = C[t+i*ldc] * beta + c[i]; + } + + */ +} + +__attribute__((noinline)) void basicSgemm(char transa, char transb, int m, + int n, int k, float alpha, float *A, + size_t bytesA, int lda, float *B, + size_t bytesB, int ldb, float beta, + float *C, size_t bytesC, int ldc) { + if ((transa != 'N') && (transa != 'n')) { + std::cerr << "unsupported value of 'transa' in regtileSgemm()" << std::endl; + return; + } + + if ((transb != 'T') && (transb != 't')) { + std::cerr << "unsupported value of 'transb' in regtileSgemm()" << std::endl; + return; + } + + // In this code we assume the matrix sizes are multiple of tile size + if ((m % TILE_M) || (n % TILE_N)) { + std::cerr << "unsupported size of matrix. m should be multiple of " + << TILE_M << "; n should be multiple of " << TILE_N << std::endl; + return; + } + + // unsigned db[2] = {TILE_SZ/VEC_SZ,TILE_SZ}; + // unsigned dg[2] = {m/TILE_SZ*db[0],n/TILE_SZ*db[1]}; + unsigned db[2] = {TILE_N, TILE_TB_HEIGHT}; + unsigned dg[2] = {m * TILE_N / TILE_M, n * TILE_TB_HEIGHT / TILE_N}; + + void *sgemmDFG = __hpvm__node(mysgemmNT, 2, 2, db[0], db[1], dg[0] / db[0], + dg[1] / db[1], 12, A, bytesA, lda, B, bytesB, + ldb, C, bytesC, ldc, k, alpha, beta, 0); + __hpvm__wait(sgemmDFG); +} + +int main(int argc, char *argv[]) { + + struct pb_Parameters *params; + struct pb_TimerSet timers; + + size_t A_sz, B_sz, C_sz; + int matArow, matAcol; + int matBrow, matBcol; + std::vector<float> matA, matBT; + + /* Read command line. Expect 3 inputs: A, B and B^T + in column-major layout*/ + params = pb_ReadParameters(&argc, argv); + if ((params->inpFiles[0] == NULL) || (params->inpFiles[1] == NULL) || + (params->inpFiles[2] == NULL) || (params->inpFiles[3] != NULL)) { + fprintf(stderr, "Expecting three input filenames\n"); + exit(-1); + } + + /* Read in data */ + // load A + readColMajorMatrixFile(params->inpFiles[0], matArow, matAcol, matA); + + // load B^T + readColMajorMatrixFile(params->inpFiles[2], matBcol, matBrow, matBT); + + pb_InitializeTimerSet(&timers); + __hpvm__init(); + + pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); + // copy A to device memory + A_sz = matArow * matAcol * sizeof(float); + B_sz = matBrow * matBcol * sizeof(float); + + // allocate space for C + C_sz = matArow * matBcol * sizeof(float); + + // OpenCL memory allocation + std::vector<float> matC(matArow * matBcol); + + pb_SwitchToTimer(&timers, hpvm_TimerID_MEM_TRACK); + llvm_hpvm_track_mem(&matA.front(), A_sz); + llvm_hpvm_track_mem(&matBT.front(), B_sz); + llvm_hpvm_track_mem(&matC.front(), C_sz); + + // Copy A and B^T into device memory + pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); + + for (size_t i = 0; i < matC.size(); i++) + matC[i] = 0.0f; + + pb_SwitchToTimer(&timers, pb_TimerID_NONE); + + // Use standard sgemm interface + basicSgemm('N', 'T', matArow, matBcol, matAcol, 1.0f, &matA.front(), A_sz, + matArow, &matBT.front(), B_sz, matBcol, 0.0f, &matC.front(), C_sz, + matArow); + + pb_SwitchToTimer(&timers, pb_TimerID_COPY); + llvm_hpvm_request_mem(&matC.front(), C_sz); + + pb_SwitchToTimer(&timers, hpvm_TimerID_MEM_UNTRACK); + llvm_hpvm_untrack_mem(&matA.front()); + llvm_hpvm_untrack_mem(&matBT.front()); + llvm_hpvm_untrack_mem(&matC.front()); + pb_SwitchToTimer(&timers, pb_TimerID_NONE); + + pb_PrintTimerSet(&timers); + __hpvm__cleanup(); + + if (params->outFile) { + + /* Write C to file */ + // pb_SwitchToTimer(&timers, pb_TimerID_IO); + writeColMajorMatrixFile(params->outFile, matArow, matBcol, matC); + } + + double GPUtime = pb_GetElapsedTime(&(timers.timers[pb_TimerID_KERNEL])); + std::cout << "GFLOPs = " << 2. * matArow * matBcol * matAcol / GPUtime / 1e9 + << std::endl; + pb_FreeParameters(params); + + return 0; +} diff --git a/hpvm/test/parboil/benchmarks/spmv/Makefile b/hpvm/test/parboil/benchmarks/spmv/Makefile index 0a85b9253f12d6a084df7347677353be04b4d367..b0582e60a05d1a81b2facaf169f6dbd2d70ad8dd 100644 --- a/hpvm/test/parboil/benchmarks/spmv/Makefile +++ b/hpvm/test/parboil/benchmarks/spmv/Makefile @@ -1,9 +1,9 @@ PARBOIL_ROOT = $(LLVM_SRC_ROOT)/tools/hpvm/test/parboil APP = spmv -# Default compile visc +# Default compile hpvm ifeq ($(VERSION),) - VERSION = visc + VERSION = hpvm endif # Default use small test case diff --git a/hpvm/test/parboil/benchmarks/spmv/src/visc/Makefile b/hpvm/test/parboil/benchmarks/spmv/src/hpvm/Makefile similarity index 88% rename from hpvm/test/parboil/benchmarks/spmv/src/visc/Makefile rename to hpvm/test/parboil/benchmarks/spmv/src/hpvm/Makefile index a289d68f342ba488f8ce4d90faf26816d4d00829..06af6bebea2aa6a94f56196e0399a25ebfdda030 100644 --- a/hpvm/test/parboil/benchmarks/spmv/src/visc/Makefile +++ b/hpvm/test/parboil/benchmarks/spmv/src/hpvm/Makefile @@ -1,9 +1,9 @@ # (c) 2010 The Board of Trustees of the University of Illinois. -LANGUAGE=visc +LANGUAGE=hpvm TOOLS_SRC=common_src/convert-dataset SRCDIR_OBJS=gpu_info.ll file.ll -VISC_OBJS=main.visc.ll +HPVM_OBJS=main.hpvm.ll APP_CUDALDFLAGS=-lm APP_CFLAGS=-ffast-math -O1 -I$(TOOLS_SRC) APP_CXXFLAGS=-ffast-math -O1 -I$(TOOLS_SRC) diff --git a/hpvm/test/parboil/benchmarks/spmv/src/visc/file.cpp b/hpvm/test/parboil/benchmarks/spmv/src/hpvm/file.cpp similarity index 100% rename from hpvm/test/parboil/benchmarks/spmv/src/visc/file.cpp rename to hpvm/test/parboil/benchmarks/spmv/src/hpvm/file.cpp diff --git a/hpvm/test/parboil/benchmarks/spmv/src/visc/file.h b/hpvm/test/parboil/benchmarks/spmv/src/hpvm/file.h similarity index 100% rename from hpvm/test/parboil/benchmarks/spmv/src/visc/file.h rename to hpvm/test/parboil/benchmarks/spmv/src/hpvm/file.h diff --git a/hpvm/test/parboil/benchmarks/spmv/src/visc/gpu_info.cpp b/hpvm/test/parboil/benchmarks/spmv/src/hpvm/gpu_info.cpp similarity index 100% rename from hpvm/test/parboil/benchmarks/spmv/src/visc/gpu_info.cpp rename to hpvm/test/parboil/benchmarks/spmv/src/hpvm/gpu_info.cpp diff --git a/hpvm/test/parboil/benchmarks/spmv/src/visc/gpu_info.h b/hpvm/test/parboil/benchmarks/spmv/src/hpvm/gpu_info.h similarity index 100% rename from hpvm/test/parboil/benchmarks/spmv/src/visc/gpu_info.h rename to hpvm/test/parboil/benchmarks/spmv/src/hpvm/gpu_info.h diff --git a/hpvm/test/parboil/benchmarks/spmv/src/visc/kernel.cl b/hpvm/test/parboil/benchmarks/spmv/src/hpvm/kernel.cl similarity index 100% rename from hpvm/test/parboil/benchmarks/spmv/src/visc/kernel.cl rename to hpvm/test/parboil/benchmarks/spmv/src/hpvm/kernel.cl diff --git a/hpvm/test/parboil/benchmarks/spmv/src/visc/main.cpp b/hpvm/test/parboil/benchmarks/spmv/src/hpvm/main.cpp similarity index 68% rename from hpvm/test/parboil/benchmarks/spmv/src/visc/main.cpp rename to hpvm/test/parboil/benchmarks/spmv/src/hpvm/main.cpp index 4f72d2000afd70a986c8a1c82aa06866e2606511..4414744b4995a9ae09bb88fdda297150dfbe1031 100644 --- a/hpvm/test/parboil/benchmarks/spmv/src/visc/main.cpp +++ b/hpvm/test/parboil/benchmarks/spmv/src/hpvm/main.cpp @@ -8,15 +8,15 @@ //#include <CL/cl.h> //#include <CL/cl_ext.h> +#include <hpvm.h> #include <parboil.h> #include <stdio.h> #include <stdlib.h> #include <string.h> -#include <visc.h> +#include "convert_dataset.h" #include "file.h" #include "gpu_info.h" -#include "convert_dataset.h" #define WARP_BITS 5 @@ -54,15 +54,15 @@ void spmv_jds(float *dst_vector, size_t bytes_dst_vector, float *d_data, size_t bytes_x_vec, int dim, int *jds_ptr_int, size_t bytes_jds_ptr_int, int *sh_zcnt_int, size_t bytes_sh_zcnt_int) { - __visc__hint(visc::DEVICE); - __visc__attributes(7, dst_vector, d_data, d_index, d_perm, x_vec, jds_ptr_int, + __hpvm__hint(hpvm::DEVICE); + __hpvm__attributes(7, dst_vector, d_data, d_index, d_perm, x_vec, jds_ptr_int, sh_zcnt_int, 1, dst_vector); - void *thisNode = __visc__getNode(); - void *parentNode = __visc__getParentNode(thisNode); - int lx = __visc__getNodeInstanceID_x(thisNode); - int gx = __visc__getNodeInstanceID_x(parentNode); - int gridx = __visc__getNumNodeInstances_x(thisNode); + void *thisNode = __hpvm__getNode(); + void *parentNode = __hpvm__getParentNode(thisNode); + int lx = __hpvm__getNodeInstanceID_x(thisNode); + int gx = __hpvm__getNodeInstanceID_x(parentNode); + int gridx = __hpvm__getNumNodeInstances_x(thisNode); int ix = gx * gridx + lx; int warp_id = ix >> WARP_BITS; @@ -126,25 +126,25 @@ void spmvLvl1(float *dst_vector, size_t bytes_dst_vector, float *d_data, size_t bytes_x_vec, int dim, int *jds_ptr_int, size_t bytes_jds_ptr_int, int *sh_zcnt_int, size_t bytes_sh_zcnt_int, size_t dim_X1) { - __visc__hint(visc::DEVICE); - __visc__attributes(7, dst_vector, d_data, d_index, d_perm, x_vec, jds_ptr_int, + __hpvm__hint(hpvm::DEVICE); + __hpvm__attributes(7, dst_vector, d_data, d_index, d_perm, x_vec, jds_ptr_int, sh_zcnt_int, 1, dst_vector); - void *spmv_node = __visc__createNodeND(1, spmv_jds, dim_X1); - __visc__bindIn(spmv_node, 0, 0, 0); - __visc__bindIn(spmv_node, 1, 1, 0); - __visc__bindIn(spmv_node, 2, 2, 0); - __visc__bindIn(spmv_node, 3, 3, 0); - __visc__bindIn(spmv_node, 4, 4, 0); - __visc__bindIn(spmv_node, 5, 5, 0); - __visc__bindIn(spmv_node, 6, 6, 0); - __visc__bindIn(spmv_node, 7, 7, 0); - __visc__bindIn(spmv_node, 8, 8, 0); - __visc__bindIn(spmv_node, 9, 9, 0); - __visc__bindIn(spmv_node, 10, 10, 0); - __visc__bindIn(spmv_node, 11, 11, 0); - __visc__bindIn(spmv_node, 12, 12, 0); - __visc__bindIn(spmv_node, 13, 13, 0); - __visc__bindIn(spmv_node, 14, 14, 0); + void *spmv_node = __hpvm__createNodeND(1, spmv_jds, dim_X1); + __hpvm__bindIn(spmv_node, 0, 0, 0); + __hpvm__bindIn(spmv_node, 1, 1, 0); + __hpvm__bindIn(spmv_node, 2, 2, 0); + __hpvm__bindIn(spmv_node, 3, 3, 0); + __hpvm__bindIn(spmv_node, 4, 4, 0); + __hpvm__bindIn(spmv_node, 5, 5, 0); + __hpvm__bindIn(spmv_node, 6, 6, 0); + __hpvm__bindIn(spmv_node, 7, 7, 0); + __hpvm__bindIn(spmv_node, 8, 8, 0); + __hpvm__bindIn(spmv_node, 9, 9, 0); + __hpvm__bindIn(spmv_node, 10, 10, 0); + __hpvm__bindIn(spmv_node, 11, 11, 0); + __hpvm__bindIn(spmv_node, 12, 12, 0); + __hpvm__bindIn(spmv_node, 13, 13, 0); + __hpvm__bindIn(spmv_node, 14, 14, 0); } void spmvLvl2(float *dst_vector, size_t bytes_dst_vector, float *d_data, @@ -153,26 +153,26 @@ void spmvLvl2(float *dst_vector, size_t bytes_dst_vector, float *d_data, size_t bytes_x_vec, int dim, int *jds_ptr_int, size_t bytes_jds_ptr_int, int *sh_zcnt_int, size_t bytes_sh_zcnt_int, size_t dim_X1, size_t dim_X2) { - __visc__hint(visc::CPU_TARGET); - __visc__attributes(7, dst_vector, d_data, d_index, d_perm, x_vec, jds_ptr_int, + __hpvm__hint(hpvm::CPU_TARGET); + __hpvm__attributes(7, dst_vector, d_data, d_index, d_perm, x_vec, jds_ptr_int, sh_zcnt_int, 1, dst_vector); - void *spmv_node = __visc__createNodeND(1, spmvLvl1, dim_X2); - __visc__bindIn(spmv_node, 0, 0, 0); - __visc__bindIn(spmv_node, 1, 1, 0); - __visc__bindIn(spmv_node, 2, 2, 0); - __visc__bindIn(spmv_node, 3, 3, 0); - __visc__bindIn(spmv_node, 4, 4, 0); - __visc__bindIn(spmv_node, 5, 5, 0); - __visc__bindIn(spmv_node, 6, 6, 0); - __visc__bindIn(spmv_node, 7, 7, 0); - __visc__bindIn(spmv_node, 8, 8, 0); - __visc__bindIn(spmv_node, 9, 9, 0); - __visc__bindIn(spmv_node, 10, 10, 0); - __visc__bindIn(spmv_node, 11, 11, 0); - __visc__bindIn(spmv_node, 12, 12, 0); - __visc__bindIn(spmv_node, 13, 13, 0); - __visc__bindIn(spmv_node, 14, 14, 0); - __visc__bindIn(spmv_node, 15, 15, 0); + void *spmv_node = __hpvm__createNodeND(1, spmvLvl1, dim_X2); + __hpvm__bindIn(spmv_node, 0, 0, 0); + __hpvm__bindIn(spmv_node, 1, 1, 0); + __hpvm__bindIn(spmv_node, 2, 2, 0); + __hpvm__bindIn(spmv_node, 3, 3, 0); + __hpvm__bindIn(spmv_node, 4, 4, 0); + __hpvm__bindIn(spmv_node, 5, 5, 0); + __hpvm__bindIn(spmv_node, 6, 6, 0); + __hpvm__bindIn(spmv_node, 7, 7, 0); + __hpvm__bindIn(spmv_node, 8, 8, 0); + __hpvm__bindIn(spmv_node, 9, 9, 0); + __hpvm__bindIn(spmv_node, 10, 10, 0); + __hpvm__bindIn(spmv_node, 11, 11, 0); + __hpvm__bindIn(spmv_node, 12, 12, 0); + __hpvm__bindIn(spmv_node, 13, 13, 0); + __hpvm__bindIn(spmv_node, 14, 14, 0); + __hpvm__bindIn(spmv_node, 15, 15, 0); } void spmvLvl3(float *dst_vector, size_t bytes_dst_vector, float *d_data, @@ -181,27 +181,27 @@ void spmvLvl3(float *dst_vector, size_t bytes_dst_vector, float *d_data, size_t bytes_x_vec, int dim, int *jds_ptr_int, size_t bytes_jds_ptr_int, int *sh_zcnt_int, size_t bytes_sh_zcnt_int, size_t dim_X1, size_t dim_X2) { - __visc__hint(visc::CPU_TARGET); - __visc__attributes(7, dst_vector, d_data, d_index, d_perm, x_vec, jds_ptr_int, + __hpvm__hint(hpvm::CPU_TARGET); + __hpvm__attributes(7, dst_vector, d_data, d_index, d_perm, x_vec, jds_ptr_int, sh_zcnt_int, 1, dst_vector); - void *spmv_node = __visc__createNodeND(1, spmvLvl2, dim_X2); - __visc__bindIn(spmv_node, 0, 0, 0); - __visc__bindIn(spmv_node, 1, 1, 0); - __visc__bindIn(spmv_node, 2, 2, 0); - __visc__bindIn(spmv_node, 3, 3, 0); - __visc__bindIn(spmv_node, 4, 4, 0); - __visc__bindIn(spmv_node, 5, 5, 0); - __visc__bindIn(spmv_node, 6, 6, 0); - __visc__bindIn(spmv_node, 7, 7, 0); - __visc__bindIn(spmv_node, 8, 8, 0); - __visc__bindIn(spmv_node, 9, 9, 0); - __visc__bindIn(spmv_node, 10, 10, 0); - __visc__bindIn(spmv_node, 11, 11, 0); - __visc__bindIn(spmv_node, 12, 12, 0); - __visc__bindIn(spmv_node, 13, 13, 0); - __visc__bindIn(spmv_node, 14, 14, 0); - __visc__bindIn(spmv_node, 15, 15, 0); - __visc__bindIn(spmv_node, 16, 16, 0); + void *spmv_node = __hpvm__createNodeND(1, spmvLvl2, dim_X2); + __hpvm__bindIn(spmv_node, 0, 0, 0); + __hpvm__bindIn(spmv_node, 1, 1, 0); + __hpvm__bindIn(spmv_node, 2, 2, 0); + __hpvm__bindIn(spmv_node, 3, 3, 0); + __hpvm__bindIn(spmv_node, 4, 4, 0); + __hpvm__bindIn(spmv_node, 5, 5, 0); + __hpvm__bindIn(spmv_node, 6, 6, 0); + __hpvm__bindIn(spmv_node, 7, 7, 0); + __hpvm__bindIn(spmv_node, 8, 8, 0); + __hpvm__bindIn(spmv_node, 9, 9, 0); + __hpvm__bindIn(spmv_node, 10, 10, 0); + __hpvm__bindIn(spmv_node, 11, 11, 0); + __hpvm__bindIn(spmv_node, 12, 12, 0); + __hpvm__bindIn(spmv_node, 13, 13, 0); + __hpvm__bindIn(spmv_node, 14, 14, 0); + __hpvm__bindIn(spmv_node, 15, 15, 0); + __hpvm__bindIn(spmv_node, 16, 16, 0); } int main(int argc, char **argv) { @@ -261,7 +261,7 @@ int main(int argc, char **argv) { input_vec(parameters->inpFiles[1], h_x_vector, dim); pb_InitializeTimerSet(&timers); - __visc__init(); + __hpvm__init(); pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); memset(h_Ax_vector, 0, dim * sizeof(float)); @@ -271,14 +271,14 @@ int main(int argc, char **argv) { compute_active_thread(&block, &grid, nzcnt_len, pad, 3, 0, 8); - pb_SwitchToTimer(&timers, visc_TimerID_MEM_TRACK); - llvm_visc_track_mem(h_Ax_vector, dim * sizeof(float)); - llvm_visc_track_mem(h_data, len * sizeof(float)); - llvm_visc_track_mem(h_indices, len * sizeof(int)); - llvm_visc_track_mem(h_perm, dim * sizeof(int)); - llvm_visc_track_mem(h_x_vector, dim * sizeof(float)); - llvm_visc_track_mem(h_ptr, depth * sizeof(int)); - llvm_visc_track_mem(h_nzcnt, nzcnt_len * sizeof(int)); + pb_SwitchToTimer(&timers, hpvm_TimerID_MEM_TRACK); + llvm_hpvm_track_mem(h_Ax_vector, dim * sizeof(float)); + llvm_hpvm_track_mem(h_data, len * sizeof(float)); + llvm_hpvm_track_mem(h_indices, len * sizeof(int)); + llvm_hpvm_track_mem(h_perm, dim * sizeof(int)); + llvm_hpvm_track_mem(h_x_vector, dim * sizeof(float)); + llvm_hpvm_track_mem(h_ptr, depth * sizeof(int)); + llvm_hpvm_track_mem(h_nzcnt, nzcnt_len * sizeof(int)); // main execution pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); @@ -306,9 +306,9 @@ int main(int argc, char **argv) { block, (grid / block)}; *(RootIn *)root_in = root_in_local; - void *spmvDFG = __visc__launch(0, spmvLvl3, root_in); + void *spmvDFG = __hpvm__launch(0, spmvLvl3, root_in); - __visc__wait(spmvDFG); + __hpvm__wait(spmvDFG); pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); /******************************* Issues ******************************* @@ -326,21 +326,21 @@ int main(int argc, char **argv) { // HtoD memory copy pb_SwitchToTimer(&timers, pb_TimerID_COPY); - llvm_visc_request_mem(h_Ax_vector, dim * sizeof(float)); + llvm_hpvm_request_mem(h_Ax_vector, dim * sizeof(float)); - pb_SwitchToTimer(&timers, visc_TimerID_MEM_UNTRACK); + pb_SwitchToTimer(&timers, hpvm_TimerID_MEM_UNTRACK); - llvm_visc_untrack_mem(h_Ax_vector); - llvm_visc_untrack_mem(h_data); - llvm_visc_untrack_mem(h_indices); - llvm_visc_untrack_mem(h_perm); - llvm_visc_untrack_mem(h_x_vector); - llvm_visc_untrack_mem(h_ptr); - llvm_visc_untrack_mem(h_nzcnt); + llvm_hpvm_untrack_mem(h_Ax_vector); + llvm_hpvm_untrack_mem(h_data); + llvm_hpvm_untrack_mem(h_indices); + llvm_hpvm_untrack_mem(h_perm); + llvm_hpvm_untrack_mem(h_x_vector); + llvm_hpvm_untrack_mem(h_ptr); + llvm_hpvm_untrack_mem(h_nzcnt); pb_SwitchToTimer(&timers, pb_TimerID_NONE); pb_PrintTimerSet(&timers); - __visc__cleanup(); + __hpvm__cleanup(); if (parameters->outFile) { /*pb_SwitchToTimer(&timers, pb_TimerID_IO);*/ diff --git a/hpvm/test/parboil/benchmarks/spmv/src/visc/main.visc.ll.kernels.bc b/hpvm/test/parboil/benchmarks/spmv/src/visc/main.visc.ll.kernels.bc deleted file mode 100644 index b804d14d16cff805c0c1850d1f5079ab6e973ecf..0000000000000000000000000000000000000000 Binary files a/hpvm/test/parboil/benchmarks/spmv/src/visc/main.visc.ll.kernels.bc and /dev/null differ diff --git a/hpvm/test/parboil/benchmarks/spmv/src/visc/main.visc.ll.kernels.ll b/hpvm/test/parboil/benchmarks/spmv/src/visc/main.visc.ll.kernels.ll deleted file mode 100644 index 5604d70e8a005ee7e21c5ae9bf6dbf0dbac77d15..0000000000000000000000000000000000000000 --- a/hpvm/test/parboil/benchmarks/spmv/src/visc/main.visc.ll.kernels.ll +++ /dev/null @@ -1,138 +0,0 @@ -; ModuleID = 'build/visc_default/main.visc.ll.kernels.bc' -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024" -target triple = "spir64-unknown-unknown" - -%rtype = type {} - -; Function Attrs: optsize zeroext -define void @spmv_jds(float* %dst_vector, i64 %bytes_dst_vector, float* %d_data, i64 %bytes_d_data, i32* %d_index, i64 %bytes_d_index, i32* %d_perm, i64 %bytes_d_perm, float* %x_vec, i64 %bytes_x_vec, i32 %dim, i32* %jds_ptr_int, i64 %bytes_jds_ptr_int, i32* %sh_zcnt_int, i64 %bytes_sh_zcnt_int) #0 { -entry: - ;%0 = call i64 @_Z12get_group_idj(i32 0) - ;%1 = trunc i64 %0 to i32 - ;%2 = call i64 @_Z14get_local_sizej(i32 0) - ;%3 = trunc i64 %2 to i32 - ;%4 = mul i32 %1, %3 - ;%5 = call i64 @_Z12get_local_idj(i32 0) - ;%6 = trunc i64 %5 to i32 - ;%7 = add i32 %4, %6 - %0 = add i32 0, 0 - %1 = add i32 0, 0 - %2 = add i32 0, 0 - %3 = add i32 0, 0 - %4 = add i32 0, 0 - %5 = add i32 0, 0 - %6 = call i64 @_Z13get_global_idj(i32 0) - %7 = trunc i64 %6 to i32 - %cmp = icmp slt i32 %7, %dim - br i1 %cmp, label %if.then, label %if.end38 - -if.then: ; preds = %entry - %shr = ashr i32 %7, 5 - %idxprom = sext i32 %shr to i64 - %arrayidx = getelementptr inbounds i32* %sh_zcnt_int, i64 %idxprom - %8 = load i32* %arrayidx, align 4, !tbaa !4 - %9 = load i32* %jds_ptr_int, align 4, !tbaa !4 - %add = add nsw i32 %9, %7 - %idxprom3 = sext i32 %add to i64 - %arrayidx4 = getelementptr inbounds float* %d_data, i64 %idxprom3 - %10 = load float* %arrayidx4, align 4, !tbaa !8 - %arrayidx6 = getelementptr inbounds i32* %d_index, i64 %idxprom3 - %11 = load i32* %arrayidx6, align 4, !tbaa !4 - %idxprom7 = sext i32 %11 to i64 - %arrayidx8 = getelementptr inbounds float* %x_vec, i64 %idxprom7 - %12 = load float* %arrayidx8, align 4, !tbaa !8 - %cmp9 = icmp sgt i32 %8, 1 - br i1 %cmp9, label %if.then10, label %if.end - -if.then10: ; preds = %if.then - %arrayidx11 = getelementptr inbounds i32* %jds_ptr_int, i64 1 - %.pn77 = load i32* %arrayidx11, align 4 - %idxprom13.pn.in78 = add nsw i32 %.pn77, %7 - %idxprom13.pn79 = sext i32 %idxprom13.pn.in78 to i64 - %i.0.in80 = getelementptr inbounds i32* %d_index, i64 %idxprom13.pn79 - %i.081 = load i32* %i.0.in80, align 4 - %cmp1582 = icmp sgt i32 %8, 2 - %arrayidx1783 = getelementptr inbounds float* %d_data, i64 %idxprom13.pn79 - %13 = load float* %arrayidx1783, align 4, !tbaa !8 - br i1 %cmp1582, label %for.body, label %for.end - -for.body: ; preds = %for.body, %if.then10 - %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 2, %if.then10 ] - %14 = phi float [ %16, %for.body ], [ %13, %if.then10 ] - %i.088 = phi i32 [ %i.0, %for.body ], [ %i.081, %if.then10 ] - %sum.086 = phi float [ %add25, %for.body ], [ 0.000000e+00, %if.then10 ] - %t.085 = phi float [ %15, %for.body ], [ %12, %if.then10 ] - %d.084 = phi float [ %14, %for.body ], [ %10, %if.then10 ] - %arrayidx19 = getelementptr inbounds i32* %jds_ptr_int, i64 %indvars.iv - %idxprom23 = sext i32 %i.088 to i64 - %arrayidx24 = getelementptr inbounds float* %x_vec, i64 %idxprom23 - %15 = load float* %arrayidx24, align 4, !tbaa !8 - %mul = fmul fast float %d.084, %t.085 - %add25 = fadd fast float %sum.086, %mul - %indvars.iv.next = add i64 %indvars.iv, 1 - %.pn = load i32* %arrayidx19, align 4 - %idxprom13.pn.in = add nsw i32 %.pn, %7 - %idxprom13.pn = sext i32 %idxprom13.pn.in to i64 - %i.0.in = getelementptr inbounds i32* %d_index, i64 %idxprom13.pn - %i.0 = load i32* %i.0.in, align 4 - %arrayidx17 = getelementptr inbounds float* %d_data, i64 %idxprom13.pn - %16 = load float* %arrayidx17, align 4, !tbaa !8 - %lftr.wideiv = trunc i64 %indvars.iv.next to i32 - %exitcond = icmp eq i32 %lftr.wideiv, %8 - br i1 %exitcond, label %for.end, label %for.body - -for.end: ; preds = %for.body, %if.then10 - %.lcssa = phi float [ %13, %if.then10 ], [ %16, %for.body ] - %i.0.lcssa = phi i32 [ %i.081, %if.then10 ], [ %i.0, %for.body ] - %sum.0.lcssa = phi float [ 0.000000e+00, %if.then10 ], [ %add25, %for.body ] - %t.0.lcssa = phi float [ %12, %if.then10 ], [ %15, %for.body ] - %d.0.lcssa = phi float [ %10, %if.then10 ], [ %14, %for.body ] - %idxprom28 = sext i32 %i.0.lcssa to i64 - %arrayidx29 = getelementptr inbounds float* %x_vec, i64 %idxprom28 - %17 = load float* %arrayidx29, align 4, !tbaa !8 - %mul30 = fmul fast float %d.0.lcssa, %t.0.lcssa - %add31 = fadd fast float %sum.0.lcssa, %mul30 - br label %if.end - -if.end: ; preds = %for.end, %if.then - %d.1 = phi float [ %.lcssa, %for.end ], [ %10, %if.then ] - %t.1 = phi float [ %17, %for.end ], [ %12, %if.then ] - %sum.1 = phi float [ %add31, %for.end ], [ 0.000000e+00, %if.then ] - %mul32 = fmul fast float %d.1, %t.1 - %add33 = fadd fast float %sum.1, %mul32 - %idxprom34 = sext i32 %7 to i64 - %arrayidx35 = getelementptr inbounds i32* %d_perm, i64 %idxprom34 - %18 = load i32* %arrayidx35, align 4, !tbaa !4 - %idxprom36 = sext i32 %18 to i64 - %arrayidx37 = getelementptr inbounds float* %dst_vector, i64 %idxprom36 - store float %add33, float* %arrayidx37, align 4, !tbaa !8 - br label %if.end38 - -if.end38: ; preds = %if.end, %entry - ret void -} - -declare i64 @_Z13get_global_idj(i32) - -declare i64 @_Z12get_group_idj(i32) - -declare i64 @_Z14get_local_sizej(i32) - -declare i64 @_Z12get_local_idj(i32) - -attributes #0 = { optsize zeroext "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" "use-soft-float"="false" } - -!visc_hint_gpu = !{} -!visc_hint_cpu = !{!0, !1} -!opencl.kernels = !{!2} - -!0 = metadata !{%rtype (float*, i64, float*, i64, i32*, i64, i32*, i64, float*, i64, i32, i32*, i64, i32*, i64, i32)* undef} -!1 = metadata !{%rtype (float*, i64, float*, i64, i32*, i64, i32*, i64, float*, i64, i32, i32*, i64, i32*, i64, i32, i32)* undef} -!2 = metadata !{void (float*, i64, float*, i64, i32*, i64, i32*, i64, float*, i64, i32, i32*, i64, i32*, i64)* @spmv_jds, metadata !3} -!3 = metadata !{metadata !"kernel_arg_type", metadata !"float*", metadata !"i64", metadata !"float*", metadata !"i64", metadata !"i32*", metadata !"i64", metadata !"i32*", metadata !"i64", metadata !"float*", metadata !"i64", metadata !"i32", metadata !"i32*", metadata !"i64", metadata !"i32*", metadata !"i64"} -!4 = metadata !{metadata !5, metadata !5, i64 0} -!5 = metadata !{metadata !"int", metadata !6} -!6 = metadata !{metadata !"omnipotent char", metadata !7} -!7 = metadata !{metadata !"Simple C/C++ TBAA"} -!8 = metadata !{metadata !9, metadata !9, i64 0} -!9 = metadata !{metadata !"float", metadata !6} diff --git a/hpvm/test/parboil/benchmarks/stencil/Makefile b/hpvm/test/parboil/benchmarks/stencil/Makefile index f144c079ba27e9c2600139073c226fddd266da04..8412e4b2e8d370dc9266bd2765a2341512911f92 100644 --- a/hpvm/test/parboil/benchmarks/stencil/Makefile +++ b/hpvm/test/parboil/benchmarks/stencil/Makefile @@ -1,9 +1,9 @@ PARBOIL_ROOT = $(LLVM_SRC_ROOT)/tools/hpvm/test/parboil APP = stencil -# Default compile visc +# Default compile hpvm ifeq ($(VERSION),) - VERSION = visc + VERSION = hpvm endif # Default use small test case diff --git a/hpvm/test/parboil/benchmarks/stencil/src/visc/Makefile b/hpvm/test/parboil/benchmarks/stencil/src/hpvm/Makefile similarity index 80% rename from hpvm/test/parboil/benchmarks/stencil/src/visc/Makefile rename to hpvm/test/parboil/benchmarks/stencil/src/hpvm/Makefile index cf61fb3a6c77e07bf8ccc67902bd1a1997902763..35b36dcf3c053da03017c72d442204590675ecb4 100644 --- a/hpvm/test/parboil/benchmarks/stencil/src/visc/Makefile +++ b/hpvm/test/parboil/benchmarks/stencil/src/hpvm/Makefile @@ -1,8 +1,8 @@ # (c) 2010 The Board of Trustees of the University of Illinois. -LANGUAGE=visc +LANGUAGE=hpvm SRCDIR_OBJS=file.ll -VISC_OBJS=stencil.visc.ll +HPVM_OBJS=stencil.hpvm.ll APP_CUDALDFLAGS=-lm APP_CFLAGS=-ffast-math -O3 APP_CXXFLAGS=-ffast-math -O3 diff --git a/hpvm/test/parboil/benchmarks/stencil/src/visc/common.h b/hpvm/test/parboil/benchmarks/stencil/src/hpvm/common.h similarity index 100% rename from hpvm/test/parboil/benchmarks/stencil/src/visc/common.h rename to hpvm/test/parboil/benchmarks/stencil/src/hpvm/common.h diff --git a/hpvm/test/parboil/benchmarks/stencil/src/visc/file.cc b/hpvm/test/parboil/benchmarks/stencil/src/hpvm/file.cc similarity index 100% rename from hpvm/test/parboil/benchmarks/stencil/src/visc/file.cc rename to hpvm/test/parboil/benchmarks/stencil/src/hpvm/file.cc diff --git a/hpvm/test/parboil/benchmarks/stencil/src/visc/file.h b/hpvm/test/parboil/benchmarks/stencil/src/hpvm/file.h similarity index 100% rename from hpvm/test/parboil/benchmarks/stencil/src/visc/file.h rename to hpvm/test/parboil/benchmarks/stencil/src/hpvm/file.h diff --git a/hpvm/test/parboil/benchmarks/stencil/src/visc/kernel.cl b/hpvm/test/parboil/benchmarks/stencil/src/hpvm/kernel.cl similarity index 100% rename from hpvm/test/parboil/benchmarks/stencil/src/visc/kernel.cl rename to hpvm/test/parboil/benchmarks/stencil/src/hpvm/kernel.cl diff --git a/hpvm/test/parboil/benchmarks/stencil/src/visc/stencil.cpp b/hpvm/test/parboil/benchmarks/stencil/src/hpvm/stencil.cpp similarity index 66% rename from hpvm/test/parboil/benchmarks/stencil/src/visc/stencil.cpp rename to hpvm/test/parboil/benchmarks/stencil/src/hpvm/stencil.cpp index 5672a3ee490917d1374783eae5ab0ba1956ef441..e5810fc8101bef72dd4636b0b6c11826a8b18318 100644 --- a/hpvm/test/parboil/benchmarks/stencil/src/visc/stencil.cpp +++ b/hpvm/test/parboil/benchmarks/stencil/src/hpvm/stencil.cpp @@ -9,11 +9,11 @@ #include "common.h" #include "file.h" +#include <hpvm.h> #include <parboil.h> #include <stdio.h> #include <stdlib.h> #include <string.h> -#include <visc.h> static int read_data(float *A0, int nx, int ny, int nz, FILE *fp) { int s = 0; @@ -42,23 +42,23 @@ typedef struct __attribute__((__packed__)) { void naive_kernel(float c0, float c1, float *A0, size_t bytes_A0, float *Anext, size_t bytes_Anext, int nx, int ny, int nz) { - __visc__hint(visc::DEVICE); - __visc__attributes(2, A0, Anext, 1, Anext); + __hpvm__hint(hpvm::DEVICE); + __hpvm__attributes(2, A0, Anext, 1, Anext); - void *thisNode = __visc__getNode(); - void *parentNode = __visc__getParentNode(thisNode); + void *thisNode = __hpvm__getNode(); + void *parentNode = __hpvm__getParentNode(thisNode); - int lx = __visc__getNodeInstanceID_x(thisNode); - int ly = __visc__getNodeInstanceID_y(thisNode); - int lz = __visc__getNodeInstanceID_z(thisNode); + int lx = __hpvm__getNodeInstanceID_x(thisNode); + int ly = __hpvm__getNodeInstanceID_y(thisNode); + int lz = __hpvm__getNodeInstanceID_z(thisNode); - int gx = __visc__getNodeInstanceID_x(parentNode); - int gy = __visc__getNodeInstanceID_y(parentNode); - int gz = __visc__getNodeInstanceID_z(parentNode); + int gx = __hpvm__getNodeInstanceID_x(parentNode); + int gy = __hpvm__getNodeInstanceID_y(parentNode); + int gz = __hpvm__getNodeInstanceID_z(parentNode); - int gridx = __visc__getNumNodeInstances_x(thisNode); - int gridy = __visc__getNumNodeInstances_y(thisNode); - int gridz = __visc__getNumNodeInstances_z(thisNode); + int gridx = __hpvm__getNumNodeInstances_x(thisNode); + int gridy = __hpvm__getNumNodeInstances_y(thisNode); + int gridz = __hpvm__getNumNodeInstances_z(thisNode); int i = gx * gridx + lx + 1; int j = gy * gridy + ly + 1; @@ -78,65 +78,65 @@ void naive_kernel(float c0, float c1, float *A0, size_t bytes_A0, float *Anext, void stencilLvl1(float c0, float c1, float *A0, size_t bytes_A0, float *Anext, size_t bytes_Anext, int nx, int ny, int nz, size_t dim_X1, size_t dim_Y1, size_t dim_Z1) { - __visc__hint(visc::DEVICE); - __visc__attributes(2, A0, Anext, 1, Anext); + __hpvm__hint(hpvm::DEVICE); + __hpvm__attributes(2, A0, Anext, 1, Anext); void *stencil_node = - __visc__createNodeND(3, naive_kernel, dim_X1, dim_Y1, dim_Z1); - __visc__bindIn(stencil_node, 0, 0, 0); - __visc__bindIn(stencil_node, 1, 1, 0); - __visc__bindIn(stencil_node, 2, 2, 0); - __visc__bindIn(stencil_node, 3, 3, 0); - __visc__bindIn(stencil_node, 4, 4, 0); - __visc__bindIn(stencil_node, 5, 5, 0); - __visc__bindIn(stencil_node, 6, 6, 0); - __visc__bindIn(stencil_node, 7, 7, 0); - __visc__bindIn(stencil_node, 8, 8, 0); + __hpvm__createNodeND(3, naive_kernel, dim_X1, dim_Y1, dim_Z1); + __hpvm__bindIn(stencil_node, 0, 0, 0); + __hpvm__bindIn(stencil_node, 1, 1, 0); + __hpvm__bindIn(stencil_node, 2, 2, 0); + __hpvm__bindIn(stencil_node, 3, 3, 0); + __hpvm__bindIn(stencil_node, 4, 4, 0); + __hpvm__bindIn(stencil_node, 5, 5, 0); + __hpvm__bindIn(stencil_node, 6, 6, 0); + __hpvm__bindIn(stencil_node, 7, 7, 0); + __hpvm__bindIn(stencil_node, 8, 8, 0); } void stencilLvl2(float c0, float c1, float *A0, size_t bytes_A0, float *Anext, size_t bytes_Anext, int nx, int ny, int nz, size_t dim_X1, size_t dim_Y1, size_t dim_Z1, size_t dim_X2, size_t dim_Y2, size_t dim_Z2) { - __visc__hint(visc::CPU_TARGET); - __visc__attributes(2, A0, Anext, 1, Anext); + __hpvm__hint(hpvm::CPU_TARGET); + __hpvm__attributes(2, A0, Anext, 1, Anext); void *stencil_node = - __visc__createNodeND(3, stencilLvl1, dim_X2, dim_Y2, dim_Z2); - __visc__bindIn(stencil_node, 0, 0, 0); - __visc__bindIn(stencil_node, 1, 1, 0); - __visc__bindIn(stencil_node, 2, 2, 0); - __visc__bindIn(stencil_node, 3, 3, 0); - __visc__bindIn(stencil_node, 4, 4, 0); - __visc__bindIn(stencil_node, 5, 5, 0); - __visc__bindIn(stencil_node, 6, 6, 0); - __visc__bindIn(stencil_node, 7, 7, 0); - __visc__bindIn(stencil_node, 8, 8, 0); - __visc__bindIn(stencil_node, 9, 9, 0); - __visc__bindIn(stencil_node, 10, 10, 0); - __visc__bindIn(stencil_node, 11, 11, 0); + __hpvm__createNodeND(3, stencilLvl1, dim_X2, dim_Y2, dim_Z2); + __hpvm__bindIn(stencil_node, 0, 0, 0); + __hpvm__bindIn(stencil_node, 1, 1, 0); + __hpvm__bindIn(stencil_node, 2, 2, 0); + __hpvm__bindIn(stencil_node, 3, 3, 0); + __hpvm__bindIn(stencil_node, 4, 4, 0); + __hpvm__bindIn(stencil_node, 5, 5, 0); + __hpvm__bindIn(stencil_node, 6, 6, 0); + __hpvm__bindIn(stencil_node, 7, 7, 0); + __hpvm__bindIn(stencil_node, 8, 8, 0); + __hpvm__bindIn(stencil_node, 9, 9, 0); + __hpvm__bindIn(stencil_node, 10, 10, 0); + __hpvm__bindIn(stencil_node, 11, 11, 0); } void stencilLvl3(float c0, float c1, float *A0, size_t bytes_A0, float *Anext, size_t bytes_Anext, int nx, int ny, int nz, size_t dim_X1, size_t dim_Y1, size_t dim_Z1, size_t dim_X2, size_t dim_Y2, size_t dim_Z2) { - __visc__hint(visc::CPU_TARGET); - __visc__attributes(2, A0, Anext, 1, Anext); - void *stencil_node = __visc__createNodeND(0, stencilLvl2); - __visc__bindIn(stencil_node, 0, 0, 0); - __visc__bindIn(stencil_node, 1, 1, 0); - __visc__bindIn(stencil_node, 2, 2, 0); - __visc__bindIn(stencil_node, 3, 3, 0); - __visc__bindIn(stencil_node, 4, 4, 0); - __visc__bindIn(stencil_node, 5, 5, 0); - __visc__bindIn(stencil_node, 6, 6, 0); - __visc__bindIn(stencil_node, 7, 7, 0); - __visc__bindIn(stencil_node, 8, 8, 0); - __visc__bindIn(stencil_node, 9, 9, 0); - __visc__bindIn(stencil_node, 10, 10, 0); - __visc__bindIn(stencil_node, 11, 11, 0); - __visc__bindIn(stencil_node, 12, 12, 0); - __visc__bindIn(stencil_node, 13, 13, 0); - __visc__bindIn(stencil_node, 14, 14, 0); + __hpvm__hint(hpvm::CPU_TARGET); + __hpvm__attributes(2, A0, Anext, 1, Anext); + void *stencil_node = __hpvm__createNodeND(0, stencilLvl2); + __hpvm__bindIn(stencil_node, 0, 0, 0); + __hpvm__bindIn(stencil_node, 1, 1, 0); + __hpvm__bindIn(stencil_node, 2, 2, 0); + __hpvm__bindIn(stencil_node, 3, 3, 0); + __hpvm__bindIn(stencil_node, 4, 4, 0); + __hpvm__bindIn(stencil_node, 5, 5, 0); + __hpvm__bindIn(stencil_node, 6, 6, 0); + __hpvm__bindIn(stencil_node, 7, 7, 0); + __hpvm__bindIn(stencil_node, 8, 8, 0); + __hpvm__bindIn(stencil_node, 9, 9, 0); + __hpvm__bindIn(stencil_node, 10, 10, 0); + __hpvm__bindIn(stencil_node, 11, 11, 0); + __hpvm__bindIn(stencil_node, 12, 12, 0); + __hpvm__bindIn(stencil_node, 13, 13, 0); + __hpvm__bindIn(stencil_node, 14, 14, 0); } int main(int argc, char **argv) { @@ -195,11 +195,11 @@ int main(int argc, char **argv) { fclose(fp); pb_InitializeTimerSet(&timers); - __visc__init(); + __hpvm__init(); - pb_SwitchToTimer(&timers, visc_TimerID_MEM_TRACK); - llvm_visc_track_mem(h_A0, sizeof(float) * size); - llvm_visc_track_mem(h_Anext, sizeof(float) * size); + pb_SwitchToTimer(&timers, hpvm_TimerID_MEM_TRACK); + llvm_hpvm_track_mem(h_A0, sizeof(float) * size); + llvm_hpvm_track_mem(h_Anext, sizeof(float) * size); pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); @@ -241,9 +241,9 @@ int main(int argc, char **argv) { grid[1] / block[1], grid[2] / block[2]}; *(RootIn *)root_in = root_in_local; - void *stencilDFG = __visc__launch(0, stencilLvl3, root_in); + void *stencilDFG = __hpvm__launch(0, stencilLvl3, root_in); - __visc__wait(stencilDFG); + __hpvm__wait(stencilDFG); // printf("iteration %d\n",t); pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); float *h_temp = h_A0; @@ -255,19 +255,19 @@ int main(int argc, char **argv) { h_A0 = h_Anext; h_Anext = h_temp; pb_SwitchToTimer(&timers, pb_TimerID_COPY); - llvm_visc_request_mem(h_Anext, bytes); + llvm_hpvm_request_mem(h_Anext, bytes); printf("A[126,1,1] = %f\n", h_Anext[Index3D(nx, ny, 126, 1, 1)]); printf("A[125,1,1] = %f\n", h_Anext[Index3D(nx, ny, 125, 1, 1)]); - pb_SwitchToTimer(&timers, visc_TimerID_MEM_UNTRACK); + pb_SwitchToTimer(&timers, hpvm_TimerID_MEM_UNTRACK); - llvm_visc_untrack_mem(h_A0); - llvm_visc_untrack_mem(h_Anext); + llvm_hpvm_untrack_mem(h_A0); + llvm_hpvm_untrack_mem(h_Anext); pb_SwitchToTimer(&timers, pb_TimerID_NONE); pb_PrintTimerSet(&timers); - __visc__cleanup(); + __hpvm__cleanup(); if (parameters->outFile) { /*pb_SwitchToTimer(&timers, pb_TimerID_IO);*/ diff --git a/hpvm/test/parboil/benchmarks/stencil/src/hpvm_vec/common.h b/hpvm/test/parboil/benchmarks/stencil/src/hpvm_vec/common.h new file mode 100644 index 0000000000000000000000000000000000000000..12a6d131c29067073fa79f09c4e6f91b8662969c --- /dev/null +++ b/hpvm/test/parboil/benchmarks/stencil/src/hpvm_vec/common.h @@ -0,0 +1,15 @@ +/*************************************************************************** + *cr + *cr (C) Copyright 2010 The Board of Trustees of the + *cr University of Illinois + *cr All Rights Reserved + *cr + ***************************************************************************/ + +#ifndef _COMMON_H_ +#define _COMMON_H_ +//#define Index3D(_nx,_ny,_i,_j,_k) ((_i)+_nx*((_j)+_ny*(_k))) +// +3 for padding +#define Index3D(_nx, _ny, _i, _j, _k) ((_i) + _nx * ((_j) + _ny * (_k)) + 3) +#define TCF 4 +#endif diff --git a/hpvm/test/parboil/benchmarks/stencil/src/hpvm_vec/stencil.c b/hpvm/test/parboil/benchmarks/stencil/src/hpvm_vec/stencil.c new file mode 100644 index 0000000000000000000000000000000000000000..35c5ed960c2031b0b84124bbdd1aeb95042625ee --- /dev/null +++ b/hpvm/test/parboil/benchmarks/stencil/src/hpvm_vec/stencil.c @@ -0,0 +1,176 @@ + +/*************************************************************************** + *cr + *cr (C) Copyright 2010 The Board of Trustees of the + *cr University of Illinois + *cr All Rights Reserved + *cr + ***************************************************************************/ + +#include "common.h" +#include "file.h" +#include <hpvm.h> +#include <parboil.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +static int read_data(float *A0, int nx, int ny, int nz, FILE *fp) { + int s = 0; + int i, j, k; + for (i = 0; i < nz; i++) { + for (j = 0; j < ny; j++) { + for (k = 0; k < nx; k++) { + fread(A0 + s, sizeof(float), 1, fp); + s++; + } + } + } + return 0; +} + +void naive_kernel(float c0, float c1, float *A0, float *Anext, int nx, int ny, + int nz) { + __hpvm__attributes(2, A0, Anext, 1, Anext); + int i = get_global_id(0) + 1; + int j = get_global_id(1) + 1; + int k = get_global_id(2) + 1; + + if (i < nx - 1) { + Anext[Index3D(nx, ny, i, j, k)] = c1 * (A0[Index3D(nx, ny, i, j, k + 1)] + + A0[Index3D(nx, ny, i, j, k - 1)] + + A0[Index3D(nx, ny, i, j + 1, k)] + + A0[Index3D(nx, ny, i, j - 1, k)] + + A0[Index3D(nx, ny, i + 1, j, k)] + + A0[Index3D(nx, ny, i - 1, j, k)]) - + A0[Index3D(nx, ny, i, j, k)] * c0; + } +} + +int main(int argc, char **argv) { + struct pb_TimerSet timers; + struct pb_Parameters *parameters; + + printf("OpenCL accelerated 7 points stencil codes****\n"); + printf("Author: Li-Wen Chang <lchang20@illinois.edu>\n"); + parameters = pb_ReadParameters(&argc, argv); + + /*pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);*/ + + // declaration + int nx, ny, nz; + size_t size; + int iteration; + float c0 = 1.0 / 6.0; + float c1 = 1.0 / 6.0 / 6.0; + + if (argc < 5) { + printf("Usage: probe nx ny nz t\n" + "nx: the grid size x\n" + "ny: the grid size y\n" + "nz: the grid size z\n" + "t: the iteration time\n"); + return -1; + } + + nx = atoi(argv[1]); + if (nx < 1) + return -1; + ny = atoi(argv[2]); + if (ny < 1) + return -1; + nz = atoi(argv[3]); + if (nz < 1) + return -1; + iteration = atoi(argv[4]); + if (iteration < 1) + return -1; + + // host data + float *h_A0; + float *h_Anext; + + // load data from files + + size = nx * ny * nz; + + // Padding in the beginning to get aligned loads and stores + size = size + 3; + + h_A0 = (float *)malloc(sizeof(float) * size); + h_Anext = (float *)malloc(sizeof(float) * size); + + /*pb_SwitchToTimer(&timers, pb_TimerID_IO);*/ + FILE *fp = fopen(parameters->inpFiles[0], "rb"); + read_data(h_A0 + 3, nx, ny, nz, fp); + fclose(fp); + + pb_InitializeTimerSet(&timers); + __hpvm__init(); + + pb_SwitchToTimer(&timers, hpvm_TimerID_MEM_TRACK); + llvm_hpvm_track_mem(h_A0, sizeof(float) * size); + llvm_hpvm_track_mem(h_Anext, sizeof(float) * size); + + pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); + + memcpy(h_Anext, h_A0, sizeof(float) * size); + + // only use 1D thread block + int tx = 256 / TCF; + int block[3] = {tx, 1, 1}; + int grid[3] = {(nx - 2 + TCF * tx - 1) / (TCF * tx) * tx, ny - 2, nz - 2}; + // size_t grid[3] = {nx-2,ny-2,nz-2}; + size_t offset[3] = {1, 1, 1}; + + printf("grid(%d, %d, %d), block(%d, %d, %d)\n", grid[0], grid[1], grid[2], + block[0], block[1], block[2]); + // main execution + + int t; + size_t bytes = size * sizeof(float); + printf("A[126,1,1] = %f\n", h_A0[Index3D(nx, ny, 126, 1, 1)]); + printf("A[125,1,1] = %f\n", h_A0[Index3D(nx, ny, 125, 1, 1)]); + for (t = 0; t < iteration; t++) { + pb_SwitchToTimer(&timers, pb_TimerID_NONE); + unsigned stencilDFG = __hpvm__node( + naive_kernel, 2, 3, block[0], block[1], block[2], grid[0] / block[0], + grid[1] / block[1], grid[2] / block[2], 9, (float)c0, (float)c1, h_A0, + bytes, h_Anext, bytes, nx, ny, nz, 0); + __hpvm__wait(stencilDFG); + // printf("iteration %d\n",t); + pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); + float *h_temp = h_A0; + h_A0 = h_Anext; + h_Anext = h_temp; + } + + float *h_temp = h_A0; + h_A0 = h_Anext; + h_Anext = h_temp; + pb_SwitchToTimer(&timers, pb_TimerID_COPY); + llvm_hpvm_request_mem(h_Anext, bytes); + printf("A[126,1,1] = %f\n", h_Anext[Index3D(nx, ny, 126, 1, 1)]); + printf("A[125,1,1] = %f\n", h_Anext[Index3D(nx, ny, 125, 1, 1)]); + + pb_SwitchToTimer(&timers, hpvm_TimerID_MEM_UNTRACK); + + llvm_hpvm_untrack_mem(h_A0); + llvm_hpvm_untrack_mem(h_Anext); + + pb_SwitchToTimer(&timers, pb_TimerID_NONE); + pb_PrintTimerSet(&timers); + + __hpvm__cleanup(); + + if (parameters->outFile) { + /*pb_SwitchToTimer(&timers, pb_TimerID_IO);*/ + outputData(parameters->outFile, h_Anext + 3, nx, ny, nz); + } + /*pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);*/ + free(h_A0); + free(h_Anext); + pb_FreeParameters(parameters); + + return 0; +} diff --git a/hpvm/test/parboil/common/include/parboil.h b/hpvm/test/parboil/common/include/parboil.h index 30ad6721c3190610dd08ec131603b6fe622f897e..ba25726c027a5c67283c68a703216ad7ee785ef5 100644 --- a/hpvm/test/parboil/common/include/parboil.h +++ b/hpvm/test/parboil/common/include/parboil.h @@ -102,23 +102,23 @@ enum pb_TimerID { * host activity: automatically filled in, * not intended for direct usage */ // GPU FUNCTION - visc_TimerID_INIT_CTX, - visc_TimerID_CLEAR_CTX, - visc_TimerID_COPY_SCALAR, - visc_TimerID_COPY_PTR, - visc_TimerID_MEM_FREE, - visc_TimerID_READ_OUTPUT, - visc_TimerID_SETUP, - visc_TimerID_MEM_TRACK, - visc_TimerID_MEM_UNTRACK, - visc_TimerID_MISC, + hpvm_TimerID_INIT_CTX, + hpvm_TimerID_CLEAR_CTX, + hpvm_TimerID_COPY_SCALAR, + hpvm_TimerID_COPY_PTR, + hpvm_TimerID_MEM_FREE, + hpvm_TimerID_READ_OUTPUT, + hpvm_TimerID_SETUP, + hpvm_TimerID_MEM_TRACK, + hpvm_TimerID_MEM_UNTRACK, + hpvm_TimerID_MISC, // LAUNCH FUNCTION - visc_TimerID_PTHREAD_CREATE, - visc_TimerID_ARG_PACK, - visc_TimerID_ARG_UNPACK, - visc_TimerID_COMPUTATION, - visc_TimerID_OUTPUT_PACK, - visc_TimerID_OUTPUT_UNPACK, + hpvm_TimerID_PTHREAD_CREATE, + hpvm_TimerID_ARG_PACK, + hpvm_TimerID_ARG_UNPACK, + hpvm_TimerID_COMPUTATION, + hpvm_TimerID_OUTPUT_PACK, + hpvm_TimerID_OUTPUT_UNPACK, pb_TimerID_LAST /* Number of timer IDs */ }; diff --git a/hpvm/test/parboil/common/mk/visc.mk b/hpvm/test/parboil/common/mk/hpvm.mk similarity index 83% rename from hpvm/test/parboil/common/mk/visc.mk rename to hpvm/test/parboil/common/mk/hpvm.mk index 0a8984deeac5696557f4b6a220b4f0758f5aefcf..cbc4071be246517e9d0d70a7c5d220e04f48f427 100755 --- a/hpvm/test/parboil/common/mk/visc.mk +++ b/hpvm/test/parboil/common/mk/hpvm.mk @@ -9,37 +9,37 @@ CFLAGS=$(LANG_CFLAGS) $(PLATFORM_CFLAGS) $(APP_CFLAGS) CXXFLAGS=$(LANG_CXXFLAGS) $(PLATFORM_CXXFLAGS) $(APP_CXXFLAGS) LDFLAGS=$(LANG_LDFLAGS) $(PLATFORM_LDFLAGS) $(APP_LDFLAGS) -# VISC +# HPVM LIBCLC_LIB_PATH = $(LLVM_SRC_ROOT)/../libclc/built_libs -VISC_RT_PATH = $(LLVM_SRC_ROOT)/../build/tools/hpvm/projects/visc-rt +HPVM_RT_PATH = $(LLVM_SRC_ROOT)/../build/tools/hpvm/projects/hpvm-rt -VISC_RT_LIB = $(VISC_RT_PATH)/visc-rt.bc +HPVM_RT_LIB = $(HPVM_RT_PATH)/hpvm-rt.bc #LIBCLC_NVPTX_LIB = $(LIBCLC_LIB_PATH)/nvptx--nvidiacl.bc LIBCLC_NVPTX_LIB = $(LIBCLC_LIB_PATH)/nvptx64--nvidiacl.bc #LIBCLC_NVPTX_LIB = nvptx64--nvidiacl.bc LLVM_34_AS = /opt/llvm/bin/llvm-as -TESTGEN_OPTFLAGS = -load LLVMGenVISC.so -genvisc -globaldce +TESTGEN_OPTFLAGS = -load LLVMGenHPVM.so -genhpvm -globaldce KERNEL_GEN_FLAGS = -O3 -target nvptx64-nvidia-nvcl ifeq ($(TARGET),x86) DEVICE = SPIR_TARGET - VISC_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_SPIR.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -localmem -dfg2llvm-spir -dfg2llvm-x86 -clearDFG + HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_SPIR.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -localmem -dfg2llvm-spir -dfg2llvm-x86 -clearDFG CFLAGS += -DOPENCL_CPU else ifeq ($(TARGET),seq) DEVICE = CPU_TARGET - VISC_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG + HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG else ifeq ($(TARGET),seqx86) DEVICE = CPU_OR_SPIR_TARGET - VISC_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_SPIR.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -localmem -dfg2llvm-spir -dfg2llvm-x86 -clearDFG + HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_SPIR.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -localmem -dfg2llvm-spir -dfg2llvm-x86 -clearDFG CFLAGS += -DOPENCL_CPU else ifeq ($(TARGET),seqgpu) DEVICE = CPU_OR_GPU_TARGET - VISC_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_NVPTX.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -localmem -dfg2llvm-nvptx -dfg2llvm-x86 -clearDFG + HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_NVPTX.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -localmem -dfg2llvm-nvptx -dfg2llvm-x86 -clearDFG else DEVICE = GPU_TARGET - VISC_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_NVPTX.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -localmem -dfg2llvm-nvptx -dfg2llvm-x86 -clearDFG + HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_NVPTX.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -localmem -dfg2llvm-nvptx -dfg2llvm-x86 -clearDFG endif CFLAGS += -DDEVICE=$(DEVICE) @@ -48,31 +48,31 @@ CXXFLAGS += -DDEVICE=$(DEVICE) HOST_LINKFLAGS = ifeq ($(TIMER),x86) - VISC_OPTFLAGS += -visc-timers-x86 + HPVM_OPTFLAGS += -hpvm-timers-x86 else ifeq ($(TIMER),ptx) - VISC_OPTFLAGS += -visc-timers-ptx + HPVM_OPTFLAGS += -hpvm-timers-ptx else ifeq ($(TIMER),gen) - TESTGEN_OPTFLAGS += -visc-timers-gen + TESTGEN_OPTFLAGS += -hpvm-timers-gen else ifeq ($(TIMER),spir) - TESTGEN_OPTFLAGS += -visc-timers-spir + TESTGEN_OPTFLAGS += -hpvm-timers-spir else ifeq ($(TIMER),no) else ifeq ($(TARGET),x86) - VISC_OPTFLAGS += -visc-timers-x86 -visc-timers-spir + HPVM_OPTFLAGS += -hpvm-timers-x86 -hpvm-timers-spir else ifeq ($(TARGET),seq) - VISC_OPTFLAGS += -visc-timers-x86 + HPVM_OPTFLAGS += -hpvm-timers-x86 else ifeq ($(TARGET),seqx86) - VISC_OPTFLAGS += -visc-timers-x86 -visc-timers-spir + HPVM_OPTFLAGS += -hpvm-timers-x86 -hpvm-timers-spir else ifeq ($(TARGET),seqgpu) - VISC_OPTFLAGS += -visc-timers-x86 -visc-timers-ptx + HPVM_OPTFLAGS += -hpvm-timers-x86 -hpvm-timers-ptx else - VISC_OPTFLAGS += -visc-timers-x86 -visc-timers-ptx + HPVM_OPTFLAGS += -hpvm-timers-x86 -hpvm-timers-ptx endif - TESTGEN_OPTFLAGS += -visc-timers-gen + TESTGEN_OPTFLAGS += -hpvm-timers-gen endif ifeq ($(DABSTRACTION),true) - VISC_OPTFLAGS += -visc-eda + HPVM_OPTFLAGS += -hpvm-eda endif # Rules common to all makefiles @@ -120,7 +120,7 @@ endif ######################################## OBJS = $(call INBUILDDIR,$(SRCDIR_OBJS)) -TEST_OBJS = $(call INBUILDDIR,$(VISC_OBJS)) +TEST_OBJS = $(call INBUILDDIR,$(HPVM_OBJS)) PARBOIL_OBJS = $(call INBUILDDIR,parboil.ll) KERNEL = $(TEST_OBJS).kernels.ll KERNEL_OPT = $(BUILDDIR)/$(APP).kernels.opt.ll @@ -181,11 +181,11 @@ $(KERNEL_OPT) : $(KERNEL) $(BIN) : $(HOST_LINKED) $(CXX) -O3 $(LDFLAGS) $< -o $@ -$(HOST_LINKED) : $(HOST) $(OBJS) $(BUILDDIR)/parboil.ll $(VISC_RT_LIB) +$(HOST_LINKED) : $(HOST) $(OBJS) $(BUILDDIR)/parboil.ll $(HPVM_RT_LIB) $(LLVM_LINK) $^ -S -o $@ -$(HOST) $(KERNEL): $(BUILDDIR)/$(VISC_OBJS) - $(OPT) $(VISC_OPTFLAGS) -S $< -o $(HOST) +$(HOST) $(KERNEL): $(BUILDDIR)/$(HPVM_OBJS) + $(OPT) $(HPVM_OPTFLAGS) -S $< -o $(HOST) $(RUNDIR) : mkdir -p $(RUNDIR) @@ -202,7 +202,7 @@ $(BUILDDIR)/%.ll : $(SRCDIR)/%.cc $(BUILDDIR)/%.ll : $(SRCDIR)/%.cpp $(CXX) $(CXXFLAGS) -S -emit-llvm $< -o $@ -$(BUILDDIR)/%.visc.ll: $(BUILDDIR)/%.ll +$(BUILDDIR)/%.hpvm.ll: $(BUILDDIR)/%.ll $(OPT) $(TESTGEN_OPTFLAGS) $< -S -o $@ $(BUILDDIR)/%.o : $(SRCDIR)/%.c diff --git a/hpvm/test/parboil/common/platform/visc.default.mk b/hpvm/test/parboil/common/platform/hpvm.default.mk similarity index 61% rename from hpvm/test/parboil/common/platform/visc.default.mk rename to hpvm/test/parboil/common/platform/hpvm.default.mk index 03a9b0874aa2b2617afab71b27470b97f5b1f4b0..ca90d453a38d0b63d16e850b57de5622cbd1f2e1 100644 --- a/hpvm/test/parboil/common/platform/visc.default.mk +++ b/hpvm/test/parboil/common/platform/hpvm.default.mk @@ -12,20 +12,20 @@ #OPENCL_LIB_PATH=$(OPENCL_PATH)/lib/x86_64 #build -VISC_BUILD_DIR = $(LLVM_SRC_ROOT)/../build +HPVM_BUILD_DIR = $(LLVM_SRC_ROOT)/../build # gcc (default) -CC = $(VISC_BUILD_DIR)/bin/clang -OCLBE = $(VISC_BUILD_DIR)/bin/llvm-cbe -PLATFORM_CFLAGS = -I$(LLVM_SRC_ROOT)/include -I$(VISC_BUILD_DIR)/include -I../../../include +CC = $(HPVM_BUILD_DIR)/bin/clang +OCLBE = $(HPVM_BUILD_DIR)/bin/llvm-cbe +PLATFORM_CFLAGS = -I$(LLVM_SRC_ROOT)/include -I$(HPVM_BUILD_DIR)/include -I../../../include -CXX = $(VISC_BUILD_DIR)/bin/clang++ -PLATFORM_CXXFLAGS = -I$(LLVM_SRC_ROOT)/include -I$(VISC_BUILD_DIR)/include -I../../../include +CXX = $(HPVM_BUILD_DIR)/bin/clang++ +PLATFORM_CXXFLAGS = -I$(LLVM_SRC_ROOT)/include -I$(HPVM_BUILD_DIR)/include -I../../../include -LINKER = $(VISC_BUILD_DIR)/bin/clang++ +LINKER = $(HPVM_BUILD_DIR)/bin/clang++ PLATFORM_LDFLAGS = -lm -lpthread -lOpenCL -LLVM_LIB_PATH = $(VISC_BUILD_DIR)/lib -LLVM_BIN_PATH = $(VISC_BUILD_DIR)/bin +LLVM_LIB_PATH = $(HPVM_BUILD_DIR)/lib +LLVM_BIN_PATH = $(HPVM_BUILD_DIR)/bin OPT = $(LLVM_BIN_PATH)/opt LLVM_LINK = $(LLVM_BIN_PATH)/llvm-link diff --git a/hpvm/test/pipeline/Makefile b/hpvm/test/pipeline/Makefile index e3572ecdfc4322ecd12c25517880b87f94c0f9e1..c9a17c1634ab39b79ec903e889fcb8492eef0848 100644 --- a/hpvm/test/pipeline/Makefile +++ b/hpvm/test/pipeline/Makefile @@ -23,12 +23,12 @@ CURRENT_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) EXE = pipeline-$(TARGET) INCLUDES += -I$(SRC_DIR) -I$(CAM_PIPE_SRC_DIR) -INCLUDES += -I$(LLVM_SRC_ROOT)/include -I../include -I$(VISC_BUILD_DIR)/include +INCLUDES += -I$(LLVM_SRC_ROOT)/include -I../include -I$(HPVM_BUILD_DIR)/include ## BEGIN HPVM MAKEFILE SRCDIR_OBJS= io.ll OBJS_SRC=src/io.cc -VISC_OBJS=main.visc.ll +HPVM_OBJS=main.hpvm.ll APP = $(EXE) APP_CFLAGS += $(INCLUDES) -ffast-math -O3 -fno-lax-vector-conversions -fno-vectorize -fno-slp-vectorize APP_CXXFLAGS += $(INCLUDES) -ffast-math -O3 -fno-lax-vector-conversions -fno-vectorize -fno-slp-vectorize @@ -39,21 +39,21 @@ OBJS_CFLAGS = $(APP_CFLAGS) $(PLATFORM_CFLAGS) CXXFLAGS = $(APP_CXXFLAGS) $(PLATFORM_CXXFLAGS) LDFLAGS= $(APP_LDFLAGS) $(PLATFORM_LDFLAGS) -VISC_RT_PATH = $(LLVM_BUILD_DIR)/tools/hpvm/projects/visc-rt -VISC_RT_LIB = $(VISC_RT_PATH)/visc-rt.bc +HPVM_RT_PATH = $(LLVM_SRC_ROOT)/tools/hpvm/projects/hpvm-rt +HPVM_RT_LIB = $(HPVM_RT_PATH)/hpvm-rt.bc -TESTGEN_OPTFLAGS = -load LLVMGenVISC.so -genvisc -globaldce +TESTGEN_OPTFLAGS = -load LLVMGenHPVM.so -genhpvm -globaldce ifeq ($(TARGET),seq) DEVICE = CPU_TARGET - VISC_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG - VISC_OPTFLAGS += -visc-timers-x86 + HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG + HPVM_OPTFLAGS += -hpvm-timers-x86 else DEVICE = GPU_TARGET - VISC_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_NVPTX.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -localmem -dfg2llvm-nvptx -dfg2llvm-x86 -clearDFG - VISC_OPTFLAGS += -visc-timers-x86 -visc-timers-ptx + HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_NVPTX.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -localmem -dfg2llvm-nvptx -dfg2llvm-x86 -clearDFG + HPVM_OPTFLAGS += -hpvm-timers-x86 -hpvm-timers-ptx endif - TESTGEN_OPTFLAGS += -visc-timers-gen + TESTGEN_OPTFLAGS += -hpvm-timers-gen CFLAGS += -DDEVICE=$(DEVICE) CXXFLAGS += -DDEVICE=$(DEVICE) @@ -64,7 +64,7 @@ INBUILDDIR=$(addprefix $(BUILD_DIR)/,$(1)) .PRECIOUS: $(BUILD_DIR)/%.ll OBJS = $(call INBUILDDIR,$(SRCDIR_OBJS)) -TEST_OBJS = $(call INBUILDDIR,$(VISC_OBJS)) +TEST_OBJS = $(call INBUILDDIR,$(HPVM_OBJS)) KERNEL = $(TEST_OBJS).kernels.ll ifeq ($(TARGET),seq) @@ -91,11 +91,11 @@ $(KERNEL_OCL) : $(KERNEL) $(EXE) : $(HOST_LINKED) $(CXX) -O3 $(LDFLAGS) $< -o $@ -$(HOST_LINKED) : $(HOST) $(OBJS) $(VISC_RT_LIB) +$(HOST_LINKED) : $(HOST) $(OBJS) $(HPVM_RT_LIB) $(LLVM_LINK) $^ -S -o $@ -$(HOST) $(KERNEL): $(BUILD_DIR)/$(VISC_OBJS) - $(OPT) -debug $(VISC_OPTFLAGS) -S $< -o $(HOST) +$(HOST) $(KERNEL): $(BUILD_DIR)/$(HPVM_OBJS) + $(OPT) -debug $(HPVM_OPTFLAGS) -S $< -o $(HOST) $(BUILD_DIR): mkdir -p $(BUILD_DIR) @@ -106,7 +106,7 @@ $(BUILD_DIR)/%.ll : $(SRC_DIR)/%.cc $(BUILD_DIR)/main.ll : $(SRC_DIR)/main.cc $(CC) $(CXXFLAGS) -emit-llvm -S -o $@ $< -$(BUILD_DIR)/main.visc.ll : $(BUILD_DIR)/main.ll - $(OPT) -debug-only=genvisc $(TESTGEN_OPTFLAGS) $< -S -o $@ +$(BUILD_DIR)/main.hpvm.ll : $(BUILD_DIR)/main.ll + $(OPT) -debug-only=genhpvm $(TESTGEN_OPTFLAGS) $< -S -o $@ ## END HPVM MAKEFILE diff --git a/hpvm/test/pipeline/copyToVersions.sh b/hpvm/test/pipeline/copyToVersions.sh index 3b9c19bad6dd86de7eb9a82edc7f17b92265155e..67551aff2f1b47fb2ad9c69be44936e8145a68da 100755 --- a/hpvm/test/pipeline/copyToVersions.sh +++ b/hpvm/test/pipeline/copyToVersions.sh @@ -1,12 +1,12 @@ -declare -a versionList=("viscGPU" "viscVector" "viscScalar" "viscGPU-Scalar-MaxG" "viscVector-Scalar-MaxG" "viscGPU-Scalar-ZC" "viscVector-Scalar-ZC") +declare -a versionList=("hpvmGPU" "hpvmVector" "hpvmScalar" "hpvmGPU-Scalar-MaxG" "hpvmVector-Scalar-MaxG" "hpvmGPU-Scalar-ZC" "hpvmVector-Scalar-ZC") declare -a fileList=("Makefile" "io.cc" "main.cc") for version in "${versionList[@]}"; do echo $version for filename in "${fileList[@]}"; do - echo cp ./src/visc_parallel/$filename ./src/$version/ - cp ./src/visc_parallel/$filename ./src/$version/ + echo cp ./src/hpvm_parallel/$filename ./src/$version/ + cp ./src/hpvm_parallel/$filename ./src/$version/ done echo done diff --git a/hpvm/test/pipeline/gradient.visc.merged.experiments.notimer.ll b/hpvm/test/pipeline/gradient.hpvm.merged.experiments.notimer.ll similarity index 95% rename from hpvm/test/pipeline/gradient.visc.merged.experiments.notimer.ll rename to hpvm/test/pipeline/gradient.hpvm.merged.experiments.notimer.ll index 06ec055bb746c7cc0cd58f75ed1f8090e0afa459..8056cc12eed0e4d20d45e294bf674dfc689f6bb8 100644 --- a/hpvm/test/pipeline/gradient.visc.merged.experiments.notimer.ll +++ b/hpvm/test/pipeline/gradient.hpvm.merged.experiments.notimer.ll @@ -1,4 +1,4 @@ -; ModuleID = 'build/Gradient_default/main.visc.ll' +; ModuleID = 'build/Gradient_default/main.hpvm.ll' target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @@ -167,9 +167,9 @@ entry: ; Function Attrs: nounwind uwtable define %emptyStruct @squareRoot(float* nocapture in %Gx, i64 %bytesGx, float* nocapture in %Gy, i64 %bytesGy, float* nocapture out %G, i64 %bytesG, i32 %m, i32 %n, i32 %dummyH, i32 %dummyV) #2 { entry: - %call3 = tail call i8* @llvm.visc.getNode() - %call14 = tail call i32 @llvm.visc.getNodeInstanceID.x(i8* %call3) - %call25 = tail call i32 @llvm.visc.getNodeInstanceID.y(i8* %call3) + %call3 = tail call i8* @llvm.hpvm.getNode() + %call14 = tail call i32 @llvm.hpvm.getNodeInstanceID.x(i8* %call3) + %call25 = tail call i32 @llvm.hpvm.getNodeInstanceID.y(i8* %call3) %cmp = icmp slt i32 %call14, %n %cmp3 = icmp slt i32 %call25, %m %or.cond = and i1 %cmp, %cmp3 @@ -198,51 +198,51 @@ if.end: ; preds = %if.then, %entry ; Function Attrs: nounwind uwtable define %emptyStruct.23 @WrapperSquareRoot(float* nocapture in %Gx, i64 %bytesGx, float* nocapture in %Gy, i64 %bytesGy, float* nocapture out %G, i64 %bytesG, i32 %m, i32 %n, i32 %dummyH, i32 %dummyV) #2 { entry: - %squareRoot.node = tail call i8* @llvm.visc.createNode2D(i8* bitcast (%emptyStruct (float*, i64, float*, i64, float*, i64, i32, i32, i32, i32)* @squareRoot to i8*), i32 %m, i32 %n) - tail call void @llvm.visc.bind.input(i8* %squareRoot.node, i32 0, i32 0, i1 false) - tail call void @llvm.visc.bind.input(i8* %squareRoot.node, i32 1, i32 1, i1 false) - tail call void @llvm.visc.bind.input(i8* %squareRoot.node, i32 2, i32 2, i1 false) - tail call void @llvm.visc.bind.input(i8* %squareRoot.node, i32 3, i32 3, i1 false) - tail call void @llvm.visc.bind.input(i8* %squareRoot.node, i32 4, i32 4, i1 false) - tail call void @llvm.visc.bind.input(i8* %squareRoot.node, i32 5, i32 5, i1 false) - tail call void @llvm.visc.bind.input(i8* %squareRoot.node, i32 6, i32 6, i1 false) - tail call void @llvm.visc.bind.input(i8* %squareRoot.node, i32 7, i32 7, i1 false) - tail call void @llvm.visc.bind.input(i8* %squareRoot.node, i32 8, i32 8, i1 false) - tail call void @llvm.visc.bind.input(i8* %squareRoot.node, i32 9, i32 9, i1 false) + %squareRoot.node = tail call i8* @llvm.hpvm.createNode2D(i8* bitcast (%emptyStruct (float*, i64, float*, i64, float*, i64, i32, i32, i32, i32)* @squareRoot to i8*), i32 %m, i32 %n) + tail call void @llvm.hpvm.bind.input(i8* %squareRoot.node, i32 0, i32 0, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %squareRoot.node, i32 1, i32 1, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %squareRoot.node, i32 2, i32 2, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %squareRoot.node, i32 3, i32 3, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %squareRoot.node, i32 4, i32 4, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %squareRoot.node, i32 5, i32 5, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %squareRoot.node, i32 6, i32 6, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %squareRoot.node, i32 7, i32 7, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %squareRoot.node, i32 8, i32 8, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %squareRoot.node, i32 9, i32 9, i1 false) ret %emptyStruct.23 undef } ; Function Attrs: nounwind uwtable define %emptyStruct.24 @Gradient(float* nocapture in %Is, i64 %bytesIs, float* nocapture in %Sx, i64 %bytesSx, float* nocapture in %Sy, i64 %bytesSy, float* nocapture out %Gx, i64 %bytesGx, float* nocapture out %Gy, i64 %bytesGy, float* nocapture out %G, i64 %bytesG, i32 %m, i32 %n) #2 { entry: - %WrapperHorizontal_WrapperVertical.node = tail call i8* @llvm.visc.createNode(i8* bitcast (%WrapperHorizontal.WrapperVertical.ty (float*, i64, float*, i64, float*, i64, i32, i32, float*, i64, float*, i64, float*, i64, i32, i32)* @WrapperHorizontal_WrapperVertical to i8*)) - %WrapperSquareRoot.node = tail call i8* @llvm.visc.createNode(i8* bitcast (%emptyStruct.23 (float*, i64, float*, i64, float*, i64, i32, i32, i32, i32)* @WrapperSquareRoot to i8*)) - tail call void @llvm.visc.bind.input(i8* %WrapperHorizontal_WrapperVertical.node, i32 0, i32 0, i1 false) - tail call void @llvm.visc.bind.input(i8* %WrapperHorizontal_WrapperVertical.node, i32 1, i32 1, i1 false) - tail call void @llvm.visc.bind.input(i8* %WrapperHorizontal_WrapperVertical.node, i32 2, i32 2, i1 false) - tail call void @llvm.visc.bind.input(i8* %WrapperHorizontal_WrapperVertical.node, i32 3, i32 3, i1 false) - tail call void @llvm.visc.bind.input(i8* %WrapperHorizontal_WrapperVertical.node, i32 6, i32 4, i1 false) - tail call void @llvm.visc.bind.input(i8* %WrapperHorizontal_WrapperVertical.node, i32 7, i32 5, i1 false) - tail call void @llvm.visc.bind.input(i8* %WrapperHorizontal_WrapperVertical.node, i32 12, i32 6, i1 false) - tail call void @llvm.visc.bind.input(i8* %WrapperHorizontal_WrapperVertical.node, i32 13, i32 7, i1 false) - tail call void @llvm.visc.bind.input(i8* %WrapperHorizontal_WrapperVertical.node, i32 0, i32 8, i1 false) - tail call void @llvm.visc.bind.input(i8* %WrapperHorizontal_WrapperVertical.node, i32 1, i32 9, i1 false) - tail call void @llvm.visc.bind.input(i8* %WrapperHorizontal_WrapperVertical.node, i32 4, i32 10, i1 false) - tail call void @llvm.visc.bind.input(i8* %WrapperHorizontal_WrapperVertical.node, i32 5, i32 11, i1 false) - tail call void @llvm.visc.bind.input(i8* %WrapperHorizontal_WrapperVertical.node, i32 8, i32 12, i1 false) - tail call void @llvm.visc.bind.input(i8* %WrapperHorizontal_WrapperVertical.node, i32 9, i32 13, i1 false) - tail call void @llvm.visc.bind.input(i8* %WrapperHorizontal_WrapperVertical.node, i32 12, i32 14, i1 false) - tail call void @llvm.visc.bind.input(i8* %WrapperHorizontal_WrapperVertical.node, i32 13, i32 15, i1 false) - tail call void @llvm.visc.bind.input(i8* %WrapperSquareRoot.node, i32 6, i32 0, i1 false) - tail call void @llvm.visc.bind.input(i8* %WrapperSquareRoot.node, i32 7, i32 1, i1 false) - tail call void @llvm.visc.bind.input(i8* %WrapperSquareRoot.node, i32 8, i32 2, i1 false) - tail call void @llvm.visc.bind.input(i8* %WrapperSquareRoot.node, i32 9, i32 3, i1 false) - tail call void @llvm.visc.bind.input(i8* %WrapperSquareRoot.node, i32 10, i32 4, i1 false) - tail call void @llvm.visc.bind.input(i8* %WrapperSquareRoot.node, i32 11, i32 5, i1 false) - tail call void @llvm.visc.bind.input(i8* %WrapperSquareRoot.node, i32 12, i32 6, i1 false) - tail call void @llvm.visc.bind.input(i8* %WrapperSquareRoot.node, i32 13, i32 7, i1 false) - %output.repl = tail call i8* @llvm.visc.createEdge(i8* %WrapperHorizontal_WrapperVertical.node, i8* %WrapperSquareRoot.node, i1 false, i32 0, i32 8, i1 false) - %output1.repl = tail call i8* @llvm.visc.createEdge(i8* %WrapperHorizontal_WrapperVertical.node, i8* %WrapperSquareRoot.node, i1 false, i32 1, i32 9, i1 false) + %WrapperHorizontal_WrapperVertical.node = tail call i8* @llvm.hpvm.createNode(i8* bitcast (%WrapperHorizontal.WrapperVertical.ty (float*, i64, float*, i64, float*, i64, i32, i32, float*, i64, float*, i64, float*, i64, i32, i32)* @WrapperHorizontal_WrapperVertical to i8*)) + %WrapperSquareRoot.node = tail call i8* @llvm.hpvm.createNode(i8* bitcast (%emptyStruct.23 (float*, i64, float*, i64, float*, i64, i32, i32, i32, i32)* @WrapperSquareRoot to i8*)) + tail call void @llvm.hpvm.bind.input(i8* %WrapperHorizontal_WrapperVertical.node, i32 0, i32 0, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %WrapperHorizontal_WrapperVertical.node, i32 1, i32 1, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %WrapperHorizontal_WrapperVertical.node, i32 2, i32 2, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %WrapperHorizontal_WrapperVertical.node, i32 3, i32 3, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %WrapperHorizontal_WrapperVertical.node, i32 6, i32 4, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %WrapperHorizontal_WrapperVertical.node, i32 7, i32 5, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %WrapperHorizontal_WrapperVertical.node, i32 12, i32 6, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %WrapperHorizontal_WrapperVertical.node, i32 13, i32 7, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %WrapperHorizontal_WrapperVertical.node, i32 0, i32 8, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %WrapperHorizontal_WrapperVertical.node, i32 1, i32 9, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %WrapperHorizontal_WrapperVertical.node, i32 4, i32 10, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %WrapperHorizontal_WrapperVertical.node, i32 5, i32 11, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %WrapperHorizontal_WrapperVertical.node, i32 8, i32 12, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %WrapperHorizontal_WrapperVertical.node, i32 9, i32 13, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %WrapperHorizontal_WrapperVertical.node, i32 12, i32 14, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %WrapperHorizontal_WrapperVertical.node, i32 13, i32 15, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %WrapperSquareRoot.node, i32 6, i32 0, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %WrapperSquareRoot.node, i32 7, i32 1, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %WrapperSquareRoot.node, i32 8, i32 2, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %WrapperSquareRoot.node, i32 9, i32 3, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %WrapperSquareRoot.node, i32 10, i32 4, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %WrapperSquareRoot.node, i32 11, i32 5, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %WrapperSquareRoot.node, i32 12, i32 6, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %WrapperSquareRoot.node, i32 13, i32 7, i1 false) + %output.repl = tail call i8* @llvm.hpvm.createEdge(i8* %WrapperHorizontal_WrapperVertical.node, i8* %WrapperSquareRoot.node, i1 false, i32 0, i32 8, i1 false) + %output1.repl = tail call i8* @llvm.hpvm.createEdge(i8* %WrapperHorizontal_WrapperVertical.node, i8* %WrapperSquareRoot.node, i1 false, i32 1, i32 9, i1 false) ret %emptyStruct.24 undef } @@ -866,7 +866,7 @@ cond.false: ; preds = %land.lhs.true58, %l cond.end: ; preds = %land.lhs.true58 call void @pb_InitializeTimerSet(%struct.pb_TimerSet* %timers) #1 - call void @llvm.visc.init() + call void @llvm.hpvm.init() %103 = load i32** %p.i.i.i.i, align 8, !tbaa !5 %104 = load i32* %103, align 4, !tbaa !9 %arrayidx.i296 = getelementptr inbounds i32* %103, i64 1 @@ -1137,15 +1137,15 @@ cond.false87: ; preds = %_Z12getNextFrameRN2 unreachable cond.end88: ; preds = %_Z12getNextFrameRN2cv12VideoCaptureERNS_3MatE.exit335 - call void @llvm_visc_track_mem(i8* %150, i64 %mul65) #1 - call void @llvm_visc_track_mem(i8* %106, i64 36) #1 - call void @llvm_visc_track_mem(i8* %113, i64 36) #1 + call void @llvm_hpvm_track_mem(i8* %150, i64 %mul65) #1 + call void @llvm_hpvm_track_mem(i8* %106, i64 36) #1 + call void @llvm_hpvm_track_mem(i8* %113, i64 36) #1 %176 = load i8** %data73, align 8, !tbaa !5 - call void @llvm_visc_track_mem(i8* %176, i64 %mul65) #1 + call void @llvm_hpvm_track_mem(i8* %176, i64 %mul65) #1 %177 = load i8** %data74, align 8, !tbaa !5 - call void @llvm_visc_track_mem(i8* %177, i64 %mul65) #1 + call void @llvm_hpvm_track_mem(i8* %177, i64 %mul65) #1 %178 = load i8** %data75, align 8, !tbaa !5 - call void @llvm_visc_track_mem(i8* %178, i64 %mul65) #1 + call void @llvm_hpvm_track_mem(i8* %178, i64 %mul65) #1 %179 = load i8** %data, align 8, !tbaa !5 %180 = bitcast i8* %179 to float* store float* %180, float** %I1.i, align 1, !tbaa !5 @@ -1154,8 +1154,8 @@ cond.end88: ; preds = %_Z12getNextFrameRN2 for.body: ; preds = %for.body, %cond.end88 %j.0480 = phi i32 [ 0, %cond.end88 ], [ %inc, %for.body ] - %graphID = call i8* @llvm.visc.launch(i8* bitcast (%emptyStruct.24 (float*, i64, float*, i64, float*, i64, float*, i64, float*, i64, float*, i64, i32, i32)* @Gradient to i8*), i8* %call66, i1 false) - call void @llvm.visc.wait(i8* %graphID) + %graphID = call i8* @llvm.hpvm.launch(i8* bitcast (%emptyStruct.24 (float*, i64, float*, i64, float*, i64, float*, i64, float*, i64, float*, i64, i32, i32)* @Gradient to i8*), i8* %call66, i1 false) + call void @llvm.hpvm.wait(i8* %graphID) %inc = add i32 %j.0480, 1 %exitcond = icmp eq i32 %inc, 2994 br i1 %exitcond, label %for.end, label %for.body @@ -1163,19 +1163,19 @@ for.body: ; preds = %for.body, %cond.end for.end: ; preds = %for.body call void @pb_SwitchToTimer(%struct.pb_TimerSet* %timers, i32 0) #1 %181 = load i8** %data75, align 8, !tbaa !5 - call void @llvm_visc_request_mem(i8* %181, i64 %mul65) #1 + call void @llvm_hpvm_request_mem(i8* %181, i64 %mul65) #1 %182 = load i8** %data, align 8, !tbaa !5 - call void @llvm_visc_untrack_mem(i8* %182) #1 - call void @llvm_visc_untrack_mem(i8* %106) #1 - call void @llvm_visc_untrack_mem(i8* %113) #1 + call void @llvm_hpvm_untrack_mem(i8* %182) #1 + call void @llvm_hpvm_untrack_mem(i8* %106) #1 + call void @llvm_hpvm_untrack_mem(i8* %113) #1 %183 = load i8** %data73, align 8, !tbaa !5 - call void @llvm_visc_untrack_mem(i8* %183) #1 + call void @llvm_hpvm_untrack_mem(i8* %183) #1 %184 = load i8** %data74, align 8, !tbaa !5 - call void @llvm_visc_untrack_mem(i8* %184) #1 + call void @llvm_hpvm_untrack_mem(i8* %184) #1 %185 = load i8** %data75, align 8, !tbaa !5 - call void @llvm_visc_untrack_mem(i8* %185) #1 + call void @llvm_hpvm_untrack_mem(i8* %185) #1 call void @pb_PrintTimerSet(%struct.pb_TimerSet* %timers) #1 - call void @llvm.visc.cleanup() + call void @llvm.hpvm.cleanup() call void @pb_FreeParameters(%struct.pb_Parameters* %call3) #1 %u.i.i.i342 = getelementptr inbounds %"class.cv::Mat"* %out, i64 0, i32 9 %186 = load %"struct.cv::UMatData"** %u.i.i.i342, align 8, !tbaa !5 @@ -1647,13 +1647,13 @@ declare noalias i8* @malloc(i64) #5 declare void @_ZN2cv12VideoCaptureD1Ev(%"class.cv::VideoCapture"*) #0 -declare void @llvm_visc_track_mem(i8*, i64) #0 +declare void @llvm_hpvm_track_mem(i8*, i64) #0 declare void @pb_SwitchToTimer(%struct.pb_TimerSet*, i32) #0 -declare void @llvm_visc_request_mem(i8*, i64) #0 +declare void @llvm_hpvm_request_mem(i8*, i64) #0 -declare void @llvm_visc_untrack_mem(i8*) #0 +declare void @llvm_hpvm_untrack_mem(i8*) #0 declare void @pb_PrintTimerSet(%struct.pb_TimerSet*) #0 @@ -1713,50 +1713,50 @@ entry: declare i64 @fwrite(i8* nocapture, i64, i64, %struct._IO_FILE* nocapture) #1 ; Function Attrs: nounwind readnone -declare i8* @llvm.visc.getNode() #7 +declare i8* @llvm.hpvm.getNode() #7 ; Function Attrs: nounwind readnone -declare i32 @llvm.visc.getNodeInstanceID.x(i8*) #7 +declare i32 @llvm.hpvm.getNodeInstanceID.x(i8*) #7 ; Function Attrs: nounwind readnone -declare i32 @llvm.visc.getNodeInstanceID.y(i8*) #7 +declare i32 @llvm.hpvm.getNodeInstanceID.y(i8*) #7 ; Function Attrs: nounwind -declare i8* @llvm.visc.createNode2D(i8*, i32, i32) #1 +declare i8* @llvm.hpvm.createNode2D(i8*, i32, i32) #1 ; Function Attrs: nounwind -declare void @llvm.visc.bind.input(i8*, i32, i32, i1) #1 +declare void @llvm.hpvm.bind.input(i8*, i32, i32, i1) #1 ; Function Attrs: nounwind -declare void @llvm.visc.bind.output(i8*, i32, i32, i1) #1 +declare void @llvm.hpvm.bind.output(i8*, i32, i32, i1) #1 ; Function Attrs: nounwind readonly declare float @llvm.sqrt.f32(float) #8 ; Function Attrs: nounwind -declare i8* @llvm.visc.createNode(i8*) #1 +declare i8* @llvm.hpvm.createNode(i8*) #1 ; Function Attrs: nounwind -declare i8* @llvm.visc.createEdge(i8*, i8*, i1, i32, i32, i1) #1 +declare i8* @llvm.hpvm.createEdge(i8*, i8*, i1, i32, i32, i1) #1 ; Function Attrs: nounwind -declare void @llvm.visc.init() #1 +declare void @llvm.hpvm.init() #1 ; Function Attrs: nounwind -declare i8* @llvm.visc.launch(i8*, i8*, i1) #1 +declare i8* @llvm.hpvm.launch(i8*, i8*, i1) #1 ; Function Attrs: nounwind -declare void @llvm.visc.wait(i8*) #1 +declare void @llvm.hpvm.wait(i8*) #1 ; Function Attrs: nounwind -declare void @llvm.visc.cleanup() #1 +declare void @llvm.hpvm.cleanup() #1 ; Function Attrs: nounwind define %horizontal.vertical.ty @horizontal_vertical(float* nocapture in %n1_Is, i64 %n1_bytesIs, float* nocapture in %n1_Sx, i64 %n1_bytesSx, float* nocapture out %n1_Gx, i64 %n1_bytesGx, i32 %n1_m, i32 %n1_n, float* nocapture in %n2_Is, i64 %n2_bytesIs, float* nocapture in %n2_Sy, i64 %n2_bytesSy, float* nocapture out %n2_Gy, i64 %n2_bytesGy, i32 %n2_m, i32 %n2_n) #1 { entry: - %call3.i = tail call i8* @llvm.visc.getNode() #1 - %call14.i = tail call i32 @llvm.visc.getNodeInstanceID.x(i8* %call3.i) #1 - %call25.i = tail call i32 @llvm.visc.getNodeInstanceID.y(i8* %call3.i) #1 + %call3.i = tail call i8* @llvm.hpvm.getNode() #1 + %call14.i = tail call i32 @llvm.hpvm.getNodeInstanceID.x(i8* %call3.i) #1 + %call25.i = tail call i32 @llvm.hpvm.getNodeInstanceID.y(i8* %call3.i) #1 %mul.i = mul nsw i32 %call25.i, %n1_n %add.i = add nsw i32 %mul.i, %call14.i %cmp.i = icmp slt i32 %call14.i, %n1_n @@ -2139,25 +2139,25 @@ vertical.exit: ; preds = %if.end42.2.i67.us, ; Function Attrs: nounwind define %WrapperHorizontal.WrapperVertical.ty @WrapperHorizontal_WrapperVertical(float* nocapture in %n1_Is, i64 %n1_bytesIs, float* nocapture in %n1_Sx, i64 %n1_bytesSx, float* nocapture out %n1_Gx, i64 %n1_bytesGx, i32 %n1_m, i32 %n1_n, float* nocapture in %n2_Is, i64 %n2_bytesIs, float* nocapture in %n2_Sy, i64 %n2_bytesSy, float* nocapture out %n2_Gy, i64 %n2_bytesGy, i32 %n2_m, i32 %n2_n) #1 { entry: - %horizontal_vertical.node = tail call i8* @llvm.visc.createNode2D(i8* bitcast (%horizontal.vertical.ty (float*, i64, float*, i64, float*, i64, i32, i32, float*, i64, float*, i64, float*, i64, i32, i32)* @horizontal_vertical to i8*), i32 %n1_m, i32 %n1_n) - tail call void @llvm.visc.bind.output(i8* %horizontal_vertical.node, i32 0, i32 0, i1 false) - tail call void @llvm.visc.bind.input(i8* %horizontal_vertical.node, i32 7, i32 7, i1 false) - tail call void @llvm.visc.bind.input(i8* %horizontal_vertical.node, i32 6, i32 6, i1 false) - tail call void @llvm.visc.bind.input(i8* %horizontal_vertical.node, i32 5, i32 5, i1 false) - tail call void @llvm.visc.bind.input(i8* %horizontal_vertical.node, i32 4, i32 4, i1 false) - tail call void @llvm.visc.bind.input(i8* %horizontal_vertical.node, i32 3, i32 3, i1 false) - tail call void @llvm.visc.bind.input(i8* %horizontal_vertical.node, i32 2, i32 2, i1 false) - tail call void @llvm.visc.bind.input(i8* %horizontal_vertical.node, i32 1, i32 1, i1 false) - tail call void @llvm.visc.bind.input(i8* %horizontal_vertical.node, i32 0, i32 0, i1 false) - tail call void @llvm.visc.bind.output(i8* %horizontal_vertical.node, i32 1, i32 1, i1 false) - tail call void @llvm.visc.bind.input(i8* %horizontal_vertical.node, i32 15, i32 15, i1 false) - tail call void @llvm.visc.bind.input(i8* %horizontal_vertical.node, i32 14, i32 14, i1 false) - tail call void @llvm.visc.bind.input(i8* %horizontal_vertical.node, i32 13, i32 13, i1 false) - tail call void @llvm.visc.bind.input(i8* %horizontal_vertical.node, i32 12, i32 12, i1 false) - tail call void @llvm.visc.bind.input(i8* %horizontal_vertical.node, i32 11, i32 11, i1 false) - tail call void @llvm.visc.bind.input(i8* %horizontal_vertical.node, i32 10, i32 10, i1 false) - tail call void @llvm.visc.bind.input(i8* %horizontal_vertical.node, i32 9, i32 9, i1 false) - tail call void @llvm.visc.bind.input(i8* %horizontal_vertical.node, i32 8, i32 8, i1 false) + %horizontal_vertical.node = tail call i8* @llvm.hpvm.createNode2D(i8* bitcast (%horizontal.vertical.ty (float*, i64, float*, i64, float*, i64, i32, i32, float*, i64, float*, i64, float*, i64, i32, i32)* @horizontal_vertical to i8*), i32 %n1_m, i32 %n1_n) + tail call void @llvm.hpvm.bind.output(i8* %horizontal_vertical.node, i32 0, i32 0, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %horizontal_vertical.node, i32 7, i32 7, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %horizontal_vertical.node, i32 6, i32 6, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %horizontal_vertical.node, i32 5, i32 5, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %horizontal_vertical.node, i32 4, i32 4, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %horizontal_vertical.node, i32 3, i32 3, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %horizontal_vertical.node, i32 2, i32 2, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %horizontal_vertical.node, i32 1, i32 1, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %horizontal_vertical.node, i32 0, i32 0, i1 false) + tail call void @llvm.hpvm.bind.output(i8* %horizontal_vertical.node, i32 1, i32 1, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %horizontal_vertical.node, i32 15, i32 15, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %horizontal_vertical.node, i32 14, i32 14, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %horizontal_vertical.node, i32 13, i32 13, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %horizontal_vertical.node, i32 12, i32 12, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %horizontal_vertical.node, i32 11, i32 11, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %horizontal_vertical.node, i32 10, i32 10, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %horizontal_vertical.node, i32 9, i32 9, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %horizontal_vertical.node, i32 8, i32 8, i1 false) ret %WrapperHorizontal.WrapperVertical.ty undef } @@ -2172,9 +2172,9 @@ attributes #7 = { nounwind readnone } attributes #8 = { nounwind readonly } attributes #9 = { noreturn nounwind } -!visc_hint_gpu = !{!0, !1} -!visc_hint_cpu = !{!2, !3, !4} -!visc_hint_spir = !{} +!hpvm_hint_gpu = !{!0, !1} +!hpvm_hint_cpu = !{!2, !3, !4} +!hpvm_hint_spir = !{} !0 = metadata !{%emptyStruct (float*, i64, float*, i64, float*, i64, i32, i32, i32, i32)* @squareRoot} !1 = metadata !{%horizontal.vertical.ty (float*, i64, float*, i64, float*, i64, i32, i32, float*, i64, float*, i64, float*, i64, i32, i32)* @horizontal_vertical} diff --git a/hpvm/test/pipeline/laplacian.visc.merged.experiments.notimer.ll b/hpvm/test/pipeline/laplacian.hpvm.merged.experiments.notimer.ll similarity index 95% rename from hpvm/test/pipeline/laplacian.visc.merged.experiments.notimer.ll rename to hpvm/test/pipeline/laplacian.hpvm.merged.experiments.notimer.ll index 4b0458625157e1c6535941ec5c663f8a16660c22..aa4a0d19a0ec80910b8d82b03de018ad41470a22 100644 --- a/hpvm/test/pipeline/laplacian.visc.merged.experiments.notimer.ll +++ b/hpvm/test/pipeline/laplacian.hpvm.merged.experiments.notimer.ll @@ -1,4 +1,4 @@ -; ModuleID = 'build/Laplacian_default/main.visc.ll' +; ModuleID = 'build/Laplacian_default/main.hpvm.ll' target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @@ -170,9 +170,9 @@ declare void @llvm.lifetime.end(i64, i8* nocapture) #1 ; Function Attrs: nounwind uwtable define %emptyStruct @lincomb(float* nocapture in %Is, i64 %bytesIs, float* nocapture in %D, i64 %bytesD, float* nocapture in %E, i64 %bytesE, float* nocapture out %L, i64 %bytesL, i32 %m, i32 %n, i32 %dummyD, i32 %dummyE) #2 { entry: - %call3 = tail call i8* @llvm.visc.getNode() - %call14 = tail call i32 @llvm.visc.getNodeInstanceID.x(i8* %call3) - %call25 = tail call i32 @llvm.visc.getNodeInstanceID.y(i8* %call3) + %call3 = tail call i8* @llvm.hpvm.getNode() + %call14 = tail call i32 @llvm.hpvm.getNodeInstanceID.x(i8* %call3) + %call25 = tail call i32 @llvm.hpvm.getNodeInstanceID.y(i8* %call3) %cmp = icmp slt i32 %call14, %n %cmp3 = icmp slt i32 %call25, %m %or.cond = and i1 %cmp, %cmp3 @@ -202,55 +202,55 @@ if.end: ; preds = %if.then, %entry ; Function Attrs: nounwind uwtable define %emptyStruct.23 @WrapperLincomb(float* nocapture in %Is, i64 %bytesIs, float* nocapture in %D, i64 %bytesD, float* nocapture in %E, i64 %bytesE, float* nocapture out %L, i64 %bytesL, i32 %m, i32 %n, i32 %dummyD, i32 %dummyE) #2 { entry: - %lincomb.node = tail call i8* @llvm.visc.createNode2D(i8* bitcast (%emptyStruct (float*, i64, float*, i64, float*, i64, float*, i64, i32, i32, i32, i32)* @lincomb to i8*), i32 %m, i32 %n) - tail call void @llvm.visc.bind.input(i8* %lincomb.node, i32 0, i32 0, i1 false) - tail call void @llvm.visc.bind.input(i8* %lincomb.node, i32 1, i32 1, i1 false) - tail call void @llvm.visc.bind.input(i8* %lincomb.node, i32 2, i32 2, i1 false) - tail call void @llvm.visc.bind.input(i8* %lincomb.node, i32 3, i32 3, i1 false) - tail call void @llvm.visc.bind.input(i8* %lincomb.node, i32 4, i32 4, i1 false) - tail call void @llvm.visc.bind.input(i8* %lincomb.node, i32 5, i32 5, i1 false) - tail call void @llvm.visc.bind.input(i8* %lincomb.node, i32 6, i32 6, i1 false) - tail call void @llvm.visc.bind.input(i8* %lincomb.node, i32 7, i32 7, i1 false) - tail call void @llvm.visc.bind.input(i8* %lincomb.node, i32 8, i32 8, i1 false) - tail call void @llvm.visc.bind.input(i8* %lincomb.node, i32 9, i32 9, i1 false) - tail call void @llvm.visc.bind.input(i8* %lincomb.node, i32 10, i32 10, i1 false) - tail call void @llvm.visc.bind.input(i8* %lincomb.node, i32 11, i32 11, i1 false) + %lincomb.node = tail call i8* @llvm.hpvm.createNode2D(i8* bitcast (%emptyStruct (float*, i64, float*, i64, float*, i64, float*, i64, i32, i32, i32, i32)* @lincomb to i8*), i32 %m, i32 %n) + tail call void @llvm.hpvm.bind.input(i8* %lincomb.node, i32 0, i32 0, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %lincomb.node, i32 1, i32 1, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %lincomb.node, i32 2, i32 2, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %lincomb.node, i32 3, i32 3, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %lincomb.node, i32 4, i32 4, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %lincomb.node, i32 5, i32 5, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %lincomb.node, i32 6, i32 6, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %lincomb.node, i32 7, i32 7, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %lincomb.node, i32 8, i32 8, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %lincomb.node, i32 9, i32 9, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %lincomb.node, i32 10, i32 10, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %lincomb.node, i32 11, i32 11, i1 false) ret %emptyStruct.23 undef } ; Function Attrs: nounwind uwtable define %emptyStruct.24 @LaplacianEstimate(float* nocapture in %Is, i64 %bytesIs, float* nocapture in %B, i64 %bytesB, float* nocapture out %D, i64 %bytesD, float* nocapture out %E, i64 %bytesE, float* nocapture out %L, i64 %bytesL, i32 %m, i32 %n) #2 { entry: - %WrapperDilate_WrapperErode.node = tail call i8* @llvm.visc.createNode(i8* bitcast (%WrapperDilate.WrapperErode.ty (float*, i64, float*, i64, float*, i64, i32, i32, float*, i64, float*, i64, float*, i64, i32, i32)* @WrapperDilate_WrapperErode to i8*)) - %WrapperLincomb.node = tail call i8* @llvm.visc.createNode(i8* bitcast (%emptyStruct.23 (float*, i64, float*, i64, float*, i64, float*, i64, i32, i32, i32, i32)* @WrapperLincomb to i8*)) - tail call void @llvm.visc.bind.input(i8* %WrapperDilate_WrapperErode.node, i32 0, i32 0, i1 false) - tail call void @llvm.visc.bind.input(i8* %WrapperDilate_WrapperErode.node, i32 1, i32 1, i1 false) - tail call void @llvm.visc.bind.input(i8* %WrapperDilate_WrapperErode.node, i32 2, i32 2, i1 false) - tail call void @llvm.visc.bind.input(i8* %WrapperDilate_WrapperErode.node, i32 3, i32 3, i1 false) - tail call void @llvm.visc.bind.input(i8* %WrapperDilate_WrapperErode.node, i32 4, i32 4, i1 false) - tail call void @llvm.visc.bind.input(i8* %WrapperDilate_WrapperErode.node, i32 5, i32 5, i1 false) - tail call void @llvm.visc.bind.input(i8* %WrapperDilate_WrapperErode.node, i32 10, i32 6, i1 false) - tail call void @llvm.visc.bind.input(i8* %WrapperDilate_WrapperErode.node, i32 11, i32 7, i1 false) - tail call void @llvm.visc.bind.input(i8* %WrapperDilate_WrapperErode.node, i32 0, i32 8, i1 false) - tail call void @llvm.visc.bind.input(i8* %WrapperDilate_WrapperErode.node, i32 1, i32 9, i1 false) - tail call void @llvm.visc.bind.input(i8* %WrapperDilate_WrapperErode.node, i32 2, i32 10, i1 false) - tail call void @llvm.visc.bind.input(i8* %WrapperDilate_WrapperErode.node, i32 3, i32 11, i1 false) - tail call void @llvm.visc.bind.input(i8* %WrapperDilate_WrapperErode.node, i32 6, i32 12, i1 false) - tail call void @llvm.visc.bind.input(i8* %WrapperDilate_WrapperErode.node, i32 7, i32 13, i1 false) - tail call void @llvm.visc.bind.input(i8* %WrapperDilate_WrapperErode.node, i32 10, i32 14, i1 false) - tail call void @llvm.visc.bind.input(i8* %WrapperDilate_WrapperErode.node, i32 11, i32 15, i1 false) - tail call void @llvm.visc.bind.input(i8* %WrapperLincomb.node, i32 0, i32 0, i1 false) - tail call void @llvm.visc.bind.input(i8* %WrapperLincomb.node, i32 1, i32 1, i1 false) - tail call void @llvm.visc.bind.input(i8* %WrapperLincomb.node, i32 4, i32 2, i1 false) - tail call void @llvm.visc.bind.input(i8* %WrapperLincomb.node, i32 5, i32 3, i1 false) - tail call void @llvm.visc.bind.input(i8* %WrapperLincomb.node, i32 6, i32 4, i1 false) - tail call void @llvm.visc.bind.input(i8* %WrapperLincomb.node, i32 7, i32 5, i1 false) - tail call void @llvm.visc.bind.input(i8* %WrapperLincomb.node, i32 8, i32 6, i1 false) - tail call void @llvm.visc.bind.input(i8* %WrapperLincomb.node, i32 9, i32 7, i1 false) - tail call void @llvm.visc.bind.input(i8* %WrapperLincomb.node, i32 10, i32 8, i1 false) - tail call void @llvm.visc.bind.input(i8* %WrapperLincomb.node, i32 11, i32 9, i1 false) - %output.repl = tail call i8* @llvm.visc.createEdge(i8* %WrapperDilate_WrapperErode.node, i8* %WrapperLincomb.node, i1 false, i32 0, i32 10, i1 false) - %output1.repl = tail call i8* @llvm.visc.createEdge(i8* %WrapperDilate_WrapperErode.node, i8* %WrapperLincomb.node, i1 false, i32 1, i32 11, i1 false) + %WrapperDilate_WrapperErode.node = tail call i8* @llvm.hpvm.createNode(i8* bitcast (%WrapperDilate.WrapperErode.ty (float*, i64, float*, i64, float*, i64, i32, i32, float*, i64, float*, i64, float*, i64, i32, i32)* @WrapperDilate_WrapperErode to i8*)) + %WrapperLincomb.node = tail call i8* @llvm.hpvm.createNode(i8* bitcast (%emptyStruct.23 (float*, i64, float*, i64, float*, i64, float*, i64, i32, i32, i32, i32)* @WrapperLincomb to i8*)) + tail call void @llvm.hpvm.bind.input(i8* %WrapperDilate_WrapperErode.node, i32 0, i32 0, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %WrapperDilate_WrapperErode.node, i32 1, i32 1, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %WrapperDilate_WrapperErode.node, i32 2, i32 2, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %WrapperDilate_WrapperErode.node, i32 3, i32 3, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %WrapperDilate_WrapperErode.node, i32 4, i32 4, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %WrapperDilate_WrapperErode.node, i32 5, i32 5, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %WrapperDilate_WrapperErode.node, i32 10, i32 6, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %WrapperDilate_WrapperErode.node, i32 11, i32 7, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %WrapperDilate_WrapperErode.node, i32 0, i32 8, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %WrapperDilate_WrapperErode.node, i32 1, i32 9, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %WrapperDilate_WrapperErode.node, i32 2, i32 10, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %WrapperDilate_WrapperErode.node, i32 3, i32 11, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %WrapperDilate_WrapperErode.node, i32 6, i32 12, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %WrapperDilate_WrapperErode.node, i32 7, i32 13, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %WrapperDilate_WrapperErode.node, i32 10, i32 14, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %WrapperDilate_WrapperErode.node, i32 11, i32 15, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %WrapperLincomb.node, i32 0, i32 0, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %WrapperLincomb.node, i32 1, i32 1, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %WrapperLincomb.node, i32 4, i32 2, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %WrapperLincomb.node, i32 5, i32 3, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %WrapperLincomb.node, i32 6, i32 4, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %WrapperLincomb.node, i32 7, i32 5, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %WrapperLincomb.node, i32 8, i32 6, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %WrapperLincomb.node, i32 9, i32 7, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %WrapperLincomb.node, i32 10, i32 8, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %WrapperLincomb.node, i32 11, i32 9, i1 false) + %output.repl = tail call i8* @llvm.hpvm.createEdge(i8* %WrapperDilate_WrapperErode.node, i8* %WrapperLincomb.node, i1 false, i32 0, i32 10, i1 false) + %output1.repl = tail call i8* @llvm.hpvm.createEdge(i8* %WrapperDilate_WrapperErode.node, i8* %WrapperLincomb.node, i1 false, i32 1, i32 11, i1 false) ret %emptyStruct.24 undef } @@ -873,7 +873,7 @@ cond.false: ; preds = %land.lhs.true58, %l cond.end: ; preds = %land.lhs.true58 call void @pb_InitializeTimerSet(%struct.pb_TimerSet* %timers) #1 - call void @llvm.visc.init() + call void @llvm.hpvm.init() %103 = load i32** %p.i.i.i.i, align 8, !tbaa !5 %104 = load i32* %103, align 4, !tbaa !9 %arrayidx.i290 = getelementptr inbounds i32* %103, i64 1 @@ -1062,18 +1062,18 @@ _Z12getNextFrameRN2cv12VideoCaptureERNS_3MatE.exit332: ; preds = %if.then.i328, call void @llvm.lifetime.end(i64 24, i8* %134) #1 %data = getelementptr inbounds %"class.cv::Mat"* %src, i64 0, i32 4 %139 = load i8** %data, align 8, !tbaa !5 - call void @llvm_visc_track_mem(i8* %139, i64 %mul65) #1 + call void @llvm_hpvm_track_mem(i8* %139, i64 %mul65) #1 %arraydecay = getelementptr inbounds [9 x float]* %B, i64 0, i64 0 - call void @llvm_visc_track_mem(i8* %106, i64 36) #1 + call void @llvm_hpvm_track_mem(i8* %106, i64 36) #1 %data81 = getelementptr inbounds %"class.cv::Mat"* %D, i64 0, i32 4 %140 = load i8** %data81, align 8, !tbaa !5 - call void @llvm_visc_track_mem(i8* %140, i64 %mul65) #1 + call void @llvm_hpvm_track_mem(i8* %140, i64 %mul65) #1 %data82 = getelementptr inbounds %"class.cv::Mat"* %E, i64 0, i32 4 %141 = load i8** %data82, align 8, !tbaa !5 - call void @llvm_visc_track_mem(i8* %141, i64 %mul65) #1 + call void @llvm_hpvm_track_mem(i8* %141, i64 %mul65) #1 %data83 = getelementptr inbounds %"class.cv::Mat"* %L, i64 0, i32 4 %142 = load i8** %data83, align 8, !tbaa !5 - call void @llvm_visc_track_mem(i8* %142, i64 %mul65) #1 + call void @llvm_hpvm_track_mem(i8* %142, i64 %mul65) #1 %143 = load i8** %data, align 8, !tbaa !5 %144 = bitcast i8* %143 to float* %145 = load i8** %data81, align 8, !tbaa !5 @@ -1126,8 +1126,8 @@ _Z12getNextFrameRN2cv12VideoCaptureERNS_3MatE.exit332: ; preds = %if.then.i328, for.body: ; preds = %for.body, %_Z12getNextFrameRN2cv12VideoCaptureERNS_3MatE.exit332 %j.0474 = phi i32 [ 0, %_Z12getNextFrameRN2cv12VideoCaptureERNS_3MatE.exit332 ], [ %inc, %for.body ] - %graphID = call i8* @llvm.visc.launch(i8* bitcast (%emptyStruct.24 (float*, i64, float*, i64, float*, i64, float*, i64, float*, i64, i32, i32)* @LaplacianEstimate to i8*), i8* %call66, i1 false) - call void @llvm.visc.wait(i8* %graphID) + %graphID = call i8* @llvm.hpvm.launch(i8* bitcast (%emptyStruct.24 (float*, i64, float*, i64, float*, i64, float*, i64, float*, i64, i32, i32)* @LaplacianEstimate to i8*), i8* %call66, i1 false) + call void @llvm.hpvm.wait(i8* %graphID) %inc = add nsw i32 %j.0474, 1 %exitcond = icmp eq i32 %inc, 2994 br i1 %exitcond, label %for.end, label %for.body @@ -1135,18 +1135,18 @@ for.body: ; preds = %for.body, %_Z12getN for.end: ; preds = %for.body call void @pb_SwitchToTimer(%struct.pb_TimerSet* %timers, i32 0) #1 %165 = load i8** %data83, align 8, !tbaa !5 - call void @llvm_visc_request_mem(i8* %165, i64 %mul65) #1 + call void @llvm_hpvm_request_mem(i8* %165, i64 %mul65) #1 %166 = load i8** %data, align 8, !tbaa !5 - call void @llvm_visc_untrack_mem(i8* %166) #1 - call void @llvm_visc_untrack_mem(i8* %106) #1 + call void @llvm_hpvm_untrack_mem(i8* %166) #1 + call void @llvm_hpvm_untrack_mem(i8* %106) #1 %167 = load i8** %data81, align 8, !tbaa !5 - call void @llvm_visc_untrack_mem(i8* %167) #1 + call void @llvm_hpvm_untrack_mem(i8* %167) #1 %168 = load i8** %data82, align 8, !tbaa !5 - call void @llvm_visc_untrack_mem(i8* %168) #1 + call void @llvm_hpvm_untrack_mem(i8* %168) #1 %169 = load i8** %data83, align 8, !tbaa !5 - call void @llvm_visc_untrack_mem(i8* %169) #1 + call void @llvm_hpvm_untrack_mem(i8* %169) #1 call void @pb_PrintTimerSet(%struct.pb_TimerSet* %timers) #1 - call void @llvm.visc.cleanup() + call void @llvm.hpvm.cleanup() call void @pb_FreeParameters(%struct.pb_Parameters* %call3) #1 %u.i.i.i336 = getelementptr inbounds %"class.cv::Mat"* %out, i64 0, i32 9 %170 = load %"struct.cv::UMatData"** %u.i.i.i336, align 8, !tbaa !5 @@ -1614,13 +1614,13 @@ declare noalias i8* @malloc(i64) #5 declare void @_ZN2cv12VideoCaptureD1Ev(%"class.cv::VideoCapture"*) #0 -declare void @llvm_visc_track_mem(i8*, i64) #0 +declare void @llvm_hpvm_track_mem(i8*, i64) #0 declare void @pb_SwitchToTimer(%struct.pb_TimerSet*, i32) #0 -declare void @llvm_visc_request_mem(i8*, i64) #0 +declare void @llvm_hpvm_request_mem(i8*, i64) #0 -declare void @llvm_visc_untrack_mem(i8*) #0 +declare void @llvm_hpvm_untrack_mem(i8*) #0 declare void @pb_PrintTimerSet(%struct.pb_TimerSet*) #0 @@ -1677,47 +1677,47 @@ declare i64 @fwrite(i8* nocapture, i64, i64, %struct._IO_FILE* nocapture) #1 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #1 ; Function Attrs: nounwind readnone -declare i8* @llvm.visc.getNode() #7 +declare i8* @llvm.hpvm.getNode() #7 ; Function Attrs: nounwind readnone -declare i32 @llvm.visc.getNodeInstanceID.x(i8*) #7 +declare i32 @llvm.hpvm.getNodeInstanceID.x(i8*) #7 ; Function Attrs: nounwind readnone -declare i32 @llvm.visc.getNodeInstanceID.y(i8*) #7 +declare i32 @llvm.hpvm.getNodeInstanceID.y(i8*) #7 ; Function Attrs: nounwind -declare i8* @llvm.visc.createNode2D(i8*, i32, i32) #1 +declare i8* @llvm.hpvm.createNode2D(i8*, i32, i32) #1 ; Function Attrs: nounwind -declare void @llvm.visc.bind.input(i8*, i32, i32, i1) #1 +declare void @llvm.hpvm.bind.input(i8*, i32, i32, i1) #1 ; Function Attrs: nounwind -declare void @llvm.visc.bind.output(i8*, i32, i32, i1) #1 +declare void @llvm.hpvm.bind.output(i8*, i32, i32, i1) #1 ; Function Attrs: nounwind -declare i8* @llvm.visc.createNode(i8*) #1 +declare i8* @llvm.hpvm.createNode(i8*) #1 ; Function Attrs: nounwind -declare i8* @llvm.visc.createEdge(i8*, i8*, i1, i32, i32, i1) #1 +declare i8* @llvm.hpvm.createEdge(i8*, i8*, i1, i32, i32, i1) #1 ; Function Attrs: nounwind -declare void @llvm.visc.init() #1 +declare void @llvm.hpvm.init() #1 ; Function Attrs: nounwind -declare i8* @llvm.visc.launch(i8*, i8*, i1) #1 +declare i8* @llvm.hpvm.launch(i8*, i8*, i1) #1 ; Function Attrs: nounwind -declare void @llvm.visc.wait(i8*) #1 +declare void @llvm.hpvm.wait(i8*) #1 ; Function Attrs: nounwind -declare void @llvm.visc.cleanup() #1 +declare void @llvm.hpvm.cleanup() #1 ; Function Attrs: nounwind define %dilate.erode.ty @dilate_erode(float* nocapture in %n1_Is, i64 %n1_bytesIs, float* nocapture in %n1_B, i64 %n1_bytesB, float* nocapture out %n1_D, i64 %n1_bytesD, i32 %n1_m, i32 %n1_n, float* nocapture in %n2_Is, i64 %n2_bytesIs, float* nocapture in %n2_B, i64 %n2_bytesB, float* nocapture out %n2_E, i64 %n2_bytesE, i32 %n2_m, i32 %n2_n) #1 { entry: - %call3.i = tail call i8* @llvm.visc.getNode() #1 - %call14.i = tail call i32 @llvm.visc.getNodeInstanceID.x(i8* %call3.i) #1 - %call25.i = tail call i32 @llvm.visc.getNodeInstanceID.y(i8* %call3.i) #1 + %call3.i = tail call i8* @llvm.hpvm.getNode() #1 + %call14.i = tail call i32 @llvm.hpvm.getNodeInstanceID.x(i8* %call3.i) #1 + %call25.i = tail call i32 @llvm.hpvm.getNodeInstanceID.y(i8* %call3.i) #1 %cmp.i = icmp slt i32 %call14.i, %n1_n %cmp3.i = icmp slt i32 %call25.i, %n1_m %or.cond.i = and i1 %cmp.i, %cmp3.i @@ -2070,25 +2070,25 @@ erode.exit: ; preds = %dilate.exit, %cond. ; Function Attrs: nounwind define %WrapperDilate.WrapperErode.ty @WrapperDilate_WrapperErode(float* nocapture in %n1_Is, i64 %n1_bytesIs, float* nocapture in %n1_B, i64 %n1_bytesB, float* nocapture out %n1_D, i64 %n1_bytesD, i32 %n1_m, i32 %n1_n, float* nocapture in %n2_Is, i64 %n2_bytesIs, float* nocapture in %n2_B, i64 %n2_bytesB, float* nocapture out %n2_E, i64 %n2_bytesE, i32 %n2_m, i32 %n2_n) #1 { entry: - %dilate_erode.node = tail call i8* @llvm.visc.createNode2D(i8* bitcast (%dilate.erode.ty (float*, i64, float*, i64, float*, i64, i32, i32, float*, i64, float*, i64, float*, i64, i32, i32)* @dilate_erode to i8*), i32 %n1_m, i32 %n1_n) - tail call void @llvm.visc.bind.output(i8* %dilate_erode.node, i32 0, i32 0, i1 false) - tail call void @llvm.visc.bind.input(i8* %dilate_erode.node, i32 7, i32 7, i1 false) - tail call void @llvm.visc.bind.input(i8* %dilate_erode.node, i32 6, i32 6, i1 false) - tail call void @llvm.visc.bind.input(i8* %dilate_erode.node, i32 5, i32 5, i1 false) - tail call void @llvm.visc.bind.input(i8* %dilate_erode.node, i32 4, i32 4, i1 false) - tail call void @llvm.visc.bind.input(i8* %dilate_erode.node, i32 3, i32 3, i1 false) - tail call void @llvm.visc.bind.input(i8* %dilate_erode.node, i32 2, i32 2, i1 false) - tail call void @llvm.visc.bind.input(i8* %dilate_erode.node, i32 1, i32 1, i1 false) - tail call void @llvm.visc.bind.input(i8* %dilate_erode.node, i32 0, i32 0, i1 false) - tail call void @llvm.visc.bind.output(i8* %dilate_erode.node, i32 1, i32 1, i1 false) - tail call void @llvm.visc.bind.input(i8* %dilate_erode.node, i32 15, i32 15, i1 false) - tail call void @llvm.visc.bind.input(i8* %dilate_erode.node, i32 14, i32 14, i1 false) - tail call void @llvm.visc.bind.input(i8* %dilate_erode.node, i32 13, i32 13, i1 false) - tail call void @llvm.visc.bind.input(i8* %dilate_erode.node, i32 12, i32 12, i1 false) - tail call void @llvm.visc.bind.input(i8* %dilate_erode.node, i32 11, i32 11, i1 false) - tail call void @llvm.visc.bind.input(i8* %dilate_erode.node, i32 10, i32 10, i1 false) - tail call void @llvm.visc.bind.input(i8* %dilate_erode.node, i32 9, i32 9, i1 false) - tail call void @llvm.visc.bind.input(i8* %dilate_erode.node, i32 8, i32 8, i1 false) + %dilate_erode.node = tail call i8* @llvm.hpvm.createNode2D(i8* bitcast (%dilate.erode.ty (float*, i64, float*, i64, float*, i64, i32, i32, float*, i64, float*, i64, float*, i64, i32, i32)* @dilate_erode to i8*), i32 %n1_m, i32 %n1_n) + tail call void @llvm.hpvm.bind.output(i8* %dilate_erode.node, i32 0, i32 0, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %dilate_erode.node, i32 7, i32 7, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %dilate_erode.node, i32 6, i32 6, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %dilate_erode.node, i32 5, i32 5, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %dilate_erode.node, i32 4, i32 4, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %dilate_erode.node, i32 3, i32 3, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %dilate_erode.node, i32 2, i32 2, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %dilate_erode.node, i32 1, i32 1, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %dilate_erode.node, i32 0, i32 0, i1 false) + tail call void @llvm.hpvm.bind.output(i8* %dilate_erode.node, i32 1, i32 1, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %dilate_erode.node, i32 15, i32 15, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %dilate_erode.node, i32 14, i32 14, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %dilate_erode.node, i32 13, i32 13, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %dilate_erode.node, i32 12, i32 12, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %dilate_erode.node, i32 11, i32 11, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %dilate_erode.node, i32 10, i32 10, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %dilate_erode.node, i32 9, i32 9, i1 false) + tail call void @llvm.hpvm.bind.input(i8* %dilate_erode.node, i32 8, i32 8, i1 false) ret %WrapperDilate.WrapperErode.ty undef } @@ -2103,9 +2103,9 @@ attributes #7 = { nounwind readnone } attributes #8 = { noreturn nounwind } attributes #9 = { nounwind readonly } -!visc_hint_gpu = !{!0, !1} -!visc_hint_cpu = !{!2, !3, !4} -!visc_hint_spir = !{} +!hpvm_hint_gpu = !{!0, !1} +!hpvm_hint_cpu = !{!2, !3, !4} +!hpvm_hint_spir = !{} !0 = metadata !{%emptyStruct (float*, i64, float*, i64, float*, i64, float*, i64, i32, i32, i32, i32)* @lincomb} !1 = metadata !{%dilate.erode.ty (float*, i64, float*, i64, float*, i64, i32, i32, float*, i64, float*, i64, float*, i64, i32, i32)* @dilate_erode} diff --git a/hpvm/test/pipeline/run.sh b/hpvm/test/pipeline/run.sh index 0c8435764bd87c92dd30ad51aa97011ddb07b339..5ac734026bf839c511dfdfb843b07382e6d8d4d6 100755 --- a/hpvm/test/pipeline/run.sh +++ b/hpvm/test/pipeline/run.sh @@ -4,7 +4,7 @@ echo Pipeline Script $1 $2 version=$1 pos=$2 -if [[ ($version == *"GPU"*) || ($version == "visc_parallel") ]] +if [[ ($version == *"GPU"*) || ($version == "hpvm_parallel") ]] then target="" elif [[ $version == *"Vector"* ]] diff --git a/hpvm/test/pipeline/runscript.sh b/hpvm/test/pipeline/runscript.sh index 5a2933e78801993ee440ead6e19f84aae66b3577..c95af8f831eeeb7f5f464e4acbc90dd49fcb67a1 100755 --- a/hpvm/test/pipeline/runscript.sh +++ b/hpvm/test/pipeline/runscript.sh @@ -2,21 +2,21 @@ echo Pipeline Script # Compile all version -make VERSION=viscGPU clean -make VERSION=viscVector TARGET=x86 clean -make VERSION=viscScalar TARGET=seq clean +make VERSION=hpvmGPU clean +make VERSION=hpvmVector TARGET=x86 clean +make VERSION=hpvmScalar TARGET=seq clean -make VERSION=viscGPU -make VERSION=viscVector TARGET=x86 -make VERSION=viscScalar TARGET=seq +make VERSION=hpvmGPU +make VERSION=hpvmVector TARGET=x86 +make VERSION=hpvmScalar TARGET=seq #Run all version -make VERSION=viscGPU run & +make VERSION=hpvmGPU run & ID_GPU=$! -make VERSION=viscVector TARGET=x86 run & +make VERSION=hpvmVector TARGET=x86 run & ID_Vector=$! -make VERSION=viscScalar TARGET=seq run +make VERSION=hpvmScalar TARGET=seq run ID_Scalar=$! #echo Wait 60 seconds diff --git a/hpvm/test/pipeline/src/Makefile b/hpvm/test/pipeline/src/Makefile index ec39b86f1cf71e2e8b6131b076c2953b566cbb56..55acb2e0982edc2a914340f2bfacbbfc1d06397f 100644 --- a/hpvm/test/pipeline/src/Makefile +++ b/hpvm/test/pipeline/src/Makefile @@ -1,8 +1,8 @@ # (c) 2010 The Board of Trustees of the University of Illinois. -LANGUAGE=visc +LANGUAGE=hpvm SRCDIR_OBJS=io.ll #compute_gold.o -VISC_OBJS=main.visc.ll +HPVM_OBJS=main.hpvm.ll APP_CUDALDFLAGS=-lm -lstdc++ APP_CFLAGS+=-ffast-math -O3 -I/opt/opencv/include APP_CXXFLAGS+=-ffast-math -O3 -I/opt/opencv/include diff --git a/hpvm/test/pipeline/src/main.cc b/hpvm/test/pipeline/src/main.cc index 9314833d25d0a3a25f13dfb24fb8a239b94956b1..ef9d8412c70813fcae123b0ef84de1850fa6b28c 100644 --- a/hpvm/test/pipeline/src/main.cc +++ b/hpvm/test/pipeline/src/main.cc @@ -13,6 +13,7 @@ #include "opencv2/ocl/ocl.hpp" #include "opencv2/opencv.hpp" #include <cassert> +#include <hpvm.h> #include <iostream> #include <malloc.h> #include <math.h> @@ -20,7 +21,6 @@ #include <stdlib.h> #include <string.h> #include <sys/time.h> -#include <visc.h> #define NUM_RUNS 100 #define DEPTH 3 @@ -147,12 +147,12 @@ void packData(struct InStruct *args, float *I, size_t bytesI, float *Is, void gaussianSmoothing(float *I, size_t bytesI, float *Gs, size_t bytesGs, float *Is, size_t bytesIs, long m, long n) { - __visc__hint(visc::DEVICE); - __visc__attributes(2, I, Gs, 1, Is); + __hpvm__hint(hpvm::DEVICE); + __hpvm__attributes(2, I, Gs, 1, Is); - void *thisNode = __visc__getNode(); - long gx = __visc__getNodeInstanceID_x(thisNode); - long gy = __visc__getNodeInstanceID_y(thisNode); + void *thisNode = __hpvm__getNode(); + long gx = __hpvm__getNodeInstanceID_x(thisNode); + long gy = __hpvm__getNodeInstanceID_y(thisNode); int gloc = gx + gy * n; @@ -187,26 +187,26 @@ void gaussianSmoothing(float *I, size_t bytesI, float *Gs, size_t bytesGs, Is[gloc] = smoothedVal; } - __visc__return(2, bytesIs, bytesIs); + __hpvm__return(2, bytesIs, bytesIs); } void WrapperGaussianSmoothing(float *I, size_t bytesI, float *Gs, size_t bytesGs, float *Is, size_t bytesIs, long m, long n) { - __visc__hint(visc::CPU_TARGET); - __visc__attributes(2, I, Gs, 1, Is); - void *GSNode = __visc__createNodeND(2, gaussianSmoothing, m, n); - __visc__bindIn(GSNode, 0, 0, 0); // Bind I - __visc__bindIn(GSNode, 1, 1, 0); // Bind bytesI - __visc__bindIn(GSNode, 2, 2, 0); // Bind Gs - __visc__bindIn(GSNode, 3, 3, 0); // Bind bytesGs - __visc__bindIn(GSNode, 4, 4, 0); // Bind Is - __visc__bindIn(GSNode, 5, 5, 0); // Bind bytesIs - __visc__bindIn(GSNode, 6, 6, 0); // Bind m - __visc__bindIn(GSNode, 7, 7, 0); // Bind n - - __visc__bindOut(GSNode, 0, 0, 0); // bind output bytesIs - __visc__bindOut(GSNode, 1, 1, 0); // bind output bytesIs + __hpvm__hint(hpvm::CPU_TARGET); + __hpvm__attributes(2, I, Gs, 1, Is); + void *GSNode = __hpvm__createNodeND(2, gaussianSmoothing, m, n); + __hpvm__bindIn(GSNode, 0, 0, 0); // Bind I + __hpvm__bindIn(GSNode, 1, 1, 0); // Bind bytesI + __hpvm__bindIn(GSNode, 2, 2, 0); // Bind Gs + __hpvm__bindIn(GSNode, 3, 3, 0); // Bind bytesGs + __hpvm__bindIn(GSNode, 4, 4, 0); // Bind Is + __hpvm__bindIn(GSNode, 5, 5, 0); // Bind bytesIs + __hpvm__bindIn(GSNode, 6, 6, 0); // Bind m + __hpvm__bindIn(GSNode, 7, 7, 0); // Bind n + + __hpvm__bindOut(GSNode, 0, 0, 0); // bind output bytesIs + __hpvm__bindOut(GSNode, 1, 1, 0); // bind output bytesIs } /* Compute a non-linear laplacian estimate of input image I of size m x n */ @@ -220,14 +220,14 @@ void WrapperGaussianSmoothing(float *I, size_t bytesI, float *Gs, void laplacianEstimate(float *Is, size_t bytesIs, float *B, size_t bytesB, float *L, size_t bytesL, long m, long n) { - __visc__hint(visc::DEVICE); - __visc__attributes(2, Is, B, 1, L); + __hpvm__hint(hpvm::DEVICE); + __hpvm__attributes(2, Is, B, 1, L); // 3x3 image area float imageArea[SZB * SZB]; - void *thisNode = __visc__getNode(); - long gx = __visc__getNodeInstanceID_x(thisNode); - long gy = __visc__getNodeInstanceID_y(thisNode); + void *thisNode = __hpvm__getNode(); + long gx = __hpvm__getNodeInstanceID_x(thisNode); + long gy = __hpvm__getNodeInstanceID_y(thisNode); int i, j; if ((gx < n) && (gy < m)) { @@ -300,25 +300,25 @@ void laplacianEstimate(float *Is, size_t bytesIs, float *B, size_t bytesB, float laplacian = dilatedPixel + erodedPixel - 2 * imageArea[1 * SZB + 1]; L[gy * n + gx] = laplacian; } - __visc__return(1, bytesL); + __hpvm__return(1, bytesL); } void WrapperlaplacianEstimate(float *Is, size_t bytesIs, float *B, size_t bytesB, float *L, size_t bytesL, long m, long n) { - __visc__hint(visc::CPU_TARGET); - __visc__attributes(2, Is, B, 1, L); - void *LNode = __visc__createNodeND(2, laplacianEstimate, m, n); - __visc__bindIn(LNode, 0, 0, 0); // Bind Is - __visc__bindIn(LNode, 1, 1, 0); // Bind bytesIs - __visc__bindIn(LNode, 2, 2, 0); // Bind B - __visc__bindIn(LNode, 3, 3, 0); // Bind bytesB - __visc__bindIn(LNode, 4, 4, 0); // Bind L - __visc__bindIn(LNode, 5, 5, 0); // Bind bytesL - __visc__bindIn(LNode, 6, 6, 0); // Bind m - __visc__bindIn(LNode, 7, 7, 0); // Bind n - - __visc__bindOut(LNode, 0, 0, 0); // bind output bytesL + __hpvm__hint(hpvm::CPU_TARGET); + __hpvm__attributes(2, Is, B, 1, L); + void *LNode = __hpvm__createNodeND(2, laplacianEstimate, m, n); + __hpvm__bindIn(LNode, 0, 0, 0); // Bind Is + __hpvm__bindIn(LNode, 1, 1, 0); // Bind bytesIs + __hpvm__bindIn(LNode, 2, 2, 0); // Bind B + __hpvm__bindIn(LNode, 3, 3, 0); // Bind bytesB + __hpvm__bindIn(LNode, 4, 4, 0); // Bind L + __hpvm__bindIn(LNode, 5, 5, 0); // Bind bytesL + __hpvm__bindIn(LNode, 6, 6, 0); // Bind m + __hpvm__bindIn(LNode, 7, 7, 0); // Bind n + + __hpvm__bindOut(LNode, 0, 0, 0); // bind output bytesL } /* Compute the zero crossings of input image L of size m x n */ @@ -331,16 +331,16 @@ void WrapperlaplacianEstimate(float *Is, size_t bytesIs, float *B, */ void computeZeroCrossings(float *L, size_t bytesL, float *B, size_t bytesB, float *S, size_t bytesS, long m, long n) { - __visc__hint(visc::DEVICE); - //__visc__hint(visc::CPU_TARGET); - __visc__attributes(2, L, B, 1, S); + __hpvm__hint(hpvm::DEVICE); + //__hpvm__hint(hpvm::CPU_TARGET); + __hpvm__attributes(2, L, B, 1, S); // 3x3 image area float imageArea[SZB][SZB]; - void *thisNode = __visc__getNode(); - long gx = __visc__getNodeInstanceID_x(thisNode); - long gy = __visc__getNodeInstanceID_y(thisNode); + void *thisNode = __hpvm__getNode(); + long gx = __hpvm__getNodeInstanceID_x(thisNode); + long gy = __hpvm__getNodeInstanceID_y(thisNode); int i, j; if ((gx < n) && (gy < m)) { @@ -416,25 +416,25 @@ void computeZeroCrossings(float *L, size_t bytesL, float *B, size_t bytesB, float pixelSign = dilatedPixel - erodedPixel; S[gy * n + gx] = pixelSign; } - __visc__return(1, bytesS); + __hpvm__return(1, bytesS); } void WrapperComputeZeroCrossings(float *L, size_t bytesL, float *B, size_t bytesB, float *S, size_t bytesS, long m, long n) { - __visc__hint(visc::CPU_TARGET); - __visc__attributes(2, L, B, 1, S); - void *ZCNode = __visc__createNodeND(2, computeZeroCrossings, m, n); - __visc__bindIn(ZCNode, 0, 0, 0); // Bind L - __visc__bindIn(ZCNode, 1, 1, 0); // Bind bytesL - __visc__bindIn(ZCNode, 2, 2, 0); // Bind B - __visc__bindIn(ZCNode, 3, 3, 0); // Bind bytesB - __visc__bindIn(ZCNode, 4, 4, 0); // Bind S - __visc__bindIn(ZCNode, 5, 5, 0); // Bind bytesS - __visc__bindIn(ZCNode, 6, 6, 0); // Bind m - __visc__bindIn(ZCNode, 7, 7, 0); // Bind n - - __visc__bindOut(ZCNode, 0, 0, 0); // bind output bytesS + __hpvm__hint(hpvm::CPU_TARGET); + __hpvm__attributes(2, L, B, 1, S); + void *ZCNode = __hpvm__createNodeND(2, computeZeroCrossings, m, n); + __hpvm__bindIn(ZCNode, 0, 0, 0); // Bind L + __hpvm__bindIn(ZCNode, 1, 1, 0); // Bind bytesL + __hpvm__bindIn(ZCNode, 2, 2, 0); // Bind B + __hpvm__bindIn(ZCNode, 3, 3, 0); // Bind bytesB + __hpvm__bindIn(ZCNode, 4, 4, 0); // Bind S + __hpvm__bindIn(ZCNode, 5, 5, 0); // Bind bytesS + __hpvm__bindIn(ZCNode, 6, 6, 0); // Bind m + __hpvm__bindIn(ZCNode, 7, 7, 0); // Bind n + + __hpvm__bindOut(ZCNode, 0, 0, 0); // bind output bytesS } /* @@ -458,12 +458,12 @@ void computeGradient(float *Is, size_t bytesIs, float *Sx, size_t bytesSx, float *Sy, size_t bytesSy, float *G, size_t bytesG, long m, long n) { - __visc__hint(visc::DEVICE); - __visc__attributes(3, Is, Sx, Sy, 1, G); + __hpvm__hint(hpvm::DEVICE); + __hpvm__attributes(3, Is, Sx, Sy, 1, G); - void *thisNode = __visc__getNode(); - long gx = __visc__getNodeInstanceID_x(thisNode); - long gy = __visc__getNodeInstanceID_y(thisNode); + void *thisNode = __hpvm__getNode(); + long gx = __hpvm__getNodeInstanceID_x(thisNode); + long gy = __hpvm__getNodeInstanceID_y(thisNode); int gloc = gx + gy * n; @@ -498,27 +498,27 @@ void computeGradient(float *Is, size_t bytesIs, float *Sx, size_t bytesSx, G[gloc] = sqrt(Gx * Gx + Gy * Gy); } - __visc__return(1, bytesG); + __hpvm__return(1, bytesG); } void WrapperComputeGradient(float *Is, size_t bytesIs, float *Sx, size_t bytesSx, float *Sy, size_t bytesSy, float *G, size_t bytesG, long m, long n) { - __visc__hint(visc::CPU_TARGET); - __visc__attributes(3, Is, Sx, Sy, 1, G); - void *CGNode = __visc__createNodeND(2, computeGradient, m, n); - __visc__bindIn(CGNode, 0, 0, 0); // Bind Is - __visc__bindIn(CGNode, 1, 1, 0); // Bind bytesIs - __visc__bindIn(CGNode, 2, 2, 0); // Bind Sx - __visc__bindIn(CGNode, 3, 3, 0); // Bind bytesSx - __visc__bindIn(CGNode, 4, 4, 0); // Bind Sy - __visc__bindIn(CGNode, 5, 5, 0); // Bind bytesSy - __visc__bindIn(CGNode, 6, 6, 0); // Bind G - __visc__bindIn(CGNode, 7, 7, 0); // Bind bytesG - __visc__bindIn(CGNode, 8, 8, 0); // Bind m - __visc__bindIn(CGNode, 9, 9, 0); // Bind n - - __visc__bindOut(CGNode, 0, 0, 0); // bind output bytesG + __hpvm__hint(hpvm::CPU_TARGET); + __hpvm__attributes(3, Is, Sx, Sy, 1, G); + void *CGNode = __hpvm__createNodeND(2, computeGradient, m, n); + __hpvm__bindIn(CGNode, 0, 0, 0); // Bind Is + __hpvm__bindIn(CGNode, 1, 1, 0); // Bind bytesIs + __hpvm__bindIn(CGNode, 2, 2, 0); // Bind Sx + __hpvm__bindIn(CGNode, 3, 3, 0); // Bind bytesSx + __hpvm__bindIn(CGNode, 4, 4, 0); // Bind Sy + __hpvm__bindIn(CGNode, 5, 5, 0); // Bind bytesSy + __hpvm__bindIn(CGNode, 6, 6, 0); // Bind G + __hpvm__bindIn(CGNode, 7, 7, 0); // Bind bytesG + __hpvm__bindIn(CGNode, 8, 8, 0); // Bind m + __hpvm__bindIn(CGNode, 9, 9, 0); // Bind n + + __hpvm__bindOut(CGNode, 0, 0, 0); // bind output bytesG } /* @@ -531,13 +531,13 @@ void WrapperComputeGradient(float *Is, size_t bytesIs, float *Sx, void computeMaxGradientLeaf(float *G, size_t bytesG, float *maxG, size_t bytesMaxG, long m, long n) { - __visc__hint(visc::CPU_TARGET); - __visc__attributes(1, G, 1, maxG); + __hpvm__hint(hpvm::CPU_TARGET); + __hpvm__attributes(1, G, 1, maxG); - void *thisNode = __visc__getNode(); + void *thisNode = __hpvm__getNode(); - long lx = __visc__getNodeInstanceID_x(thisNode); // threadIdx.x - long dimx = __visc__getNumNodeInstances_x(thisNode); // blockDim.x + long lx = __hpvm__getNodeInstanceID_x(thisNode); // threadIdx.x + long dimx = __hpvm__getNumNodeInstances_x(thisNode); // blockDim.x // Assume a single thread block // Thread block iterates over all elements @@ -556,39 +556,39 @@ void computeMaxGradientLeaf(float *G, size_t bytesG, float *maxG, *maxG = G[lx]; } - __visc__return(1, bytesMaxG); + __hpvm__return(1, bytesMaxG); } void computeMaxGradientTB(float *G, size_t bytesG, float *maxG, size_t bytesMaxG, long m, long n, long block_x) { - __visc__hint(visc::CPU_TARGET); - __visc__attributes(2, G, maxG, 1, maxG); - void *CMGLeafNode = __visc__createNodeND(1, computeMaxGradientLeaf, block_x); - __visc__bindIn(CMGLeafNode, 0, 0, 0); // Bind G - __visc__bindIn(CMGLeafNode, 1, 1, 0); // Bind bytesG - __visc__bindIn(CMGLeafNode, 2, 2, 0); // Bind maxG - __visc__bindIn(CMGLeafNode, 3, 3, 0); // Bind bytesMaxG - __visc__bindIn(CMGLeafNode, 4, 4, 0); // Bind m - __visc__bindIn(CMGLeafNode, 5, 5, 0); // Bind n - - __visc__bindOut(CMGLeafNode, 0, 0, 0); // bind output bytesMaxG + __hpvm__hint(hpvm::CPU_TARGET); + __hpvm__attributes(2, G, maxG, 1, maxG); + void *CMGLeafNode = __hpvm__createNodeND(1, computeMaxGradientLeaf, block_x); + __hpvm__bindIn(CMGLeafNode, 0, 0, 0); // Bind G + __hpvm__bindIn(CMGLeafNode, 1, 1, 0); // Bind bytesG + __hpvm__bindIn(CMGLeafNode, 2, 2, 0); // Bind maxG + __hpvm__bindIn(CMGLeafNode, 3, 3, 0); // Bind bytesMaxG + __hpvm__bindIn(CMGLeafNode, 4, 4, 0); // Bind m + __hpvm__bindIn(CMGLeafNode, 5, 5, 0); // Bind n + + __hpvm__bindOut(CMGLeafNode, 0, 0, 0); // bind output bytesMaxG } void WrapperComputeMaxGradient(float *G, size_t bytesG, float *maxG, size_t bytesMaxG, long m, long n, long block_x, long grid_x) { - __visc__hint(visc::CPU_TARGET); - __visc__attributes(2, G, maxG, 1, maxG); - void *CMGTBNode = __visc__createNodeND(1, computeMaxGradientTB, grid_x); - __visc__bindIn(CMGTBNode, 0, 0, 0); // Bind G - __visc__bindIn(CMGTBNode, 1, 1, 0); // Bind bytesG - __visc__bindIn(CMGTBNode, 2, 2, 0); // Bind maxG - __visc__bindIn(CMGTBNode, 3, 3, 0); // Bind bytesMaxG - __visc__bindIn(CMGTBNode, 4, 4, 0); // Bind m - __visc__bindIn(CMGTBNode, 5, 5, 0); // Bind n - __visc__bindIn(CMGTBNode, 6, 6, 0); // Bind block_x - - __visc__bindOut(CMGTBNode, 0, 0, 0); // bind output bytesMaxG + __hpvm__hint(hpvm::CPU_TARGET); + __hpvm__attributes(2, G, maxG, 1, maxG); + void *CMGTBNode = __hpvm__createNodeND(1, computeMaxGradientTB, grid_x); + __hpvm__bindIn(CMGTBNode, 0, 0, 0); // Bind G + __hpvm__bindIn(CMGTBNode, 1, 1, 0); // Bind bytesG + __hpvm__bindIn(CMGTBNode, 2, 2, 0); // Bind maxG + __hpvm__bindIn(CMGTBNode, 3, 3, 0); // Bind bytesMaxG + __hpvm__bindIn(CMGTBNode, 4, 4, 0); // Bind m + __hpvm__bindIn(CMGTBNode, 5, 5, 0); // Bind n + __hpvm__bindIn(CMGTBNode, 6, 6, 0); // Bind block_x + + __hpvm__bindOut(CMGTBNode, 0, 0, 0); // bind output bytesMaxG } /* Reject the zero crossings where the gradient is below a threshold */ @@ -604,39 +604,39 @@ void WrapperComputeMaxGradient(float *G, size_t bytesG, float *maxG, void rejectZeroCrossings(float *S, size_t bytesS, float *G, size_t bytesG, float *maxG, size_t bytesMaxG, float *E, size_t bytesE, long m, long n) { - __visc__hint(visc::DEVICE); - __visc__attributes(3, S, G, maxG, 1, E); + __hpvm__hint(hpvm::DEVICE); + __hpvm__attributes(3, S, G, maxG, 1, E); - void *thisNode = __visc__getNode(); - int gx = __visc__getNodeInstanceID_x(thisNode); - int gy = __visc__getNodeInstanceID_y(thisNode); + void *thisNode = __hpvm__getNode(); + int gx = __hpvm__getNodeInstanceID_x(thisNode); + int gy = __hpvm__getNodeInstanceID_y(thisNode); float mG = *maxG; if ((gx < n) && (gy < m)) { E[gy * n + gx] = ((S[gy * n + gx] > 0.0) && (G[gy * n + gx] > THETA * mG)) ? 1.0 : 0.0; } - __visc__return(1, bytesE); + __hpvm__return(1, bytesE); } void WrapperRejectZeroCrossings(float *S, size_t bytesS, float *G, size_t bytesG, float *maxG, size_t bytesMaxG, float *E, size_t bytesE, long m, long n) { - __visc__hint(visc::CPU_TARGET); - __visc__attributes(3, S, G, maxG, 1, E); - void *RZCNode = __visc__createNodeND(2, rejectZeroCrossings, m, n); - __visc__bindIn(RZCNode, 0, 0, 0); // Bind S - __visc__bindIn(RZCNode, 1, 1, 0); // Bind bytesS - __visc__bindIn(RZCNode, 2, 2, 0); // Bind G - __visc__bindIn(RZCNode, 3, 3, 0); // Bind bytesG - __visc__bindIn(RZCNode, 4, 4, 0); // Bind maxG - __visc__bindIn(RZCNode, 5, 5, 0); // Bind bytesMaxG - __visc__bindIn(RZCNode, 6, 6, 0); // Bind E - __visc__bindIn(RZCNode, 7, 7, 0); // Bind bytesE - __visc__bindIn(RZCNode, 8, 8, 0); // Bind m - __visc__bindIn(RZCNode, 9, 9, 0); // Bind n - - __visc__bindOut(RZCNode, 0, 0, 0); // bind output bytesE + __hpvm__hint(hpvm::CPU_TARGET); + __hpvm__attributes(3, S, G, maxG, 1, E); + void *RZCNode = __hpvm__createNodeND(2, rejectZeroCrossings, m, n); + __hpvm__bindIn(RZCNode, 0, 0, 0); // Bind S + __hpvm__bindIn(RZCNode, 1, 1, 0); // Bind bytesS + __hpvm__bindIn(RZCNode, 2, 2, 0); // Bind G + __hpvm__bindIn(RZCNode, 3, 3, 0); // Bind bytesG + __hpvm__bindIn(RZCNode, 4, 4, 0); // Bind maxG + __hpvm__bindIn(RZCNode, 5, 5, 0); // Bind bytesMaxG + __hpvm__bindIn(RZCNode, 6, 6, 0); // Bind E + __hpvm__bindIn(RZCNode, 7, 7, 0); // Bind bytesE + __hpvm__bindIn(RZCNode, 8, 8, 0); // Bind m + __hpvm__bindIn(RZCNode, 9, 9, 0); // Bind n + + __hpvm__bindOut(RZCNode, 0, 0, 0); // bind output bytesE } // Pipelined Root node @@ -656,80 +656,80 @@ void edgeDetection(float *I, size_t bytesI, // 0 long block_x, // 24 long grid_x // 25 ) { - __visc__attributes(5, I, Gs, B, Sx, Sy, 6, Is, L, S, G, maxG, E); - __visc__hint(visc::CPU_TARGET); - void *GSNode = __visc__createNodeND(0, WrapperGaussianSmoothing); - void *LNode = __visc__createNodeND(0, WrapperlaplacianEstimate); - void *CZCNode = __visc__createNodeND(0, WrapperComputeZeroCrossings); - void *CGNode = __visc__createNodeND(0, WrapperComputeGradient); - void *CMGNode = __visc__createNodeND(0, WrapperComputeMaxGradient); - void *RZCNode = __visc__createNodeND(0, WrapperRejectZeroCrossings); + __hpvm__attributes(5, I, Gs, B, Sx, Sy, 6, Is, L, S, G, maxG, E); + __hpvm__hint(hpvm::CPU_TARGET); + void *GSNode = __hpvm__createNodeND(0, WrapperGaussianSmoothing); + void *LNode = __hpvm__createNodeND(0, WrapperlaplacianEstimate); + void *CZCNode = __hpvm__createNodeND(0, WrapperComputeZeroCrossings); + void *CGNode = __hpvm__createNodeND(0, WrapperComputeGradient); + void *CMGNode = __hpvm__createNodeND(0, WrapperComputeMaxGradient); + void *RZCNode = __hpvm__createNodeND(0, WrapperRejectZeroCrossings); // Gaussian Inputs - __visc__bindIn(GSNode, 0, 0, 1); // Bind I - __visc__bindIn(GSNode, 1, 1, 1); // Bind bytesI - __visc__bindIn(GSNode, 14, 2, 1); // Bind Gs - __visc__bindIn(GSNode, 15, 3, 1); // Bind bytesGs - __visc__bindIn(GSNode, 2, 4, 1); // Bind Is - __visc__bindIn(GSNode, 3, 5, 1); // Bind bytesIs - __visc__bindIn(GSNode, 22, 6, 1); // Bind m - __visc__bindIn(GSNode, 23, 7, 1); // Bind n + __hpvm__bindIn(GSNode, 0, 0, 1); // Bind I + __hpvm__bindIn(GSNode, 1, 1, 1); // Bind bytesI + __hpvm__bindIn(GSNode, 14, 2, 1); // Bind Gs + __hpvm__bindIn(GSNode, 15, 3, 1); // Bind bytesGs + __hpvm__bindIn(GSNode, 2, 4, 1); // Bind Is + __hpvm__bindIn(GSNode, 3, 5, 1); // Bind bytesIs + __hpvm__bindIn(GSNode, 22, 6, 1); // Bind m + __hpvm__bindIn(GSNode, 23, 7, 1); // Bind n // Laplacian Inputs - __visc__bindIn(LNode, 2, 0, 1); // Bind Is - __visc__edge(GSNode, LNode, 1, 0, 1, 1); // Get bytesIs - __visc__bindIn(LNode, 16, 2, 1); // Bind B - __visc__bindIn(LNode, 17, 3, 1); // Bind bytesB - __visc__bindIn(LNode, 4, 4, 1); // Bind L - __visc__bindIn(LNode, 5, 5, 1); // Bind bytesL - __visc__bindIn(LNode, 22, 6, 1); // Bind m - __visc__bindIn(LNode, 23, 7, 1); // Bind n + __hpvm__bindIn(LNode, 2, 0, 1); // Bind Is + __hpvm__edge(GSNode, LNode, 1, 0, 1, 1); // Get bytesIs + __hpvm__bindIn(LNode, 16, 2, 1); // Bind B + __hpvm__bindIn(LNode, 17, 3, 1); // Bind bytesB + __hpvm__bindIn(LNode, 4, 4, 1); // Bind L + __hpvm__bindIn(LNode, 5, 5, 1); // Bind bytesL + __hpvm__bindIn(LNode, 22, 6, 1); // Bind m + __hpvm__bindIn(LNode, 23, 7, 1); // Bind n // Compute ZC Inputs - __visc__bindIn(CZCNode, 4, 0, 1); // Bind L - __visc__edge(LNode, CZCNode, 1, 0, 1, 1); // Get bytesL - __visc__bindIn(CZCNode, 16, 2, 1); // Bind B - __visc__bindIn(CZCNode, 17, 3, 1); // Bind bytesB - __visc__bindIn(CZCNode, 6, 4, 1); // Bind S - __visc__bindIn(CZCNode, 7, 5, 1); // Bind bytesS - __visc__bindIn(CZCNode, 22, 6, 1); // Bind m - __visc__bindIn(CZCNode, 23, 7, 1); // Bind n + __hpvm__bindIn(CZCNode, 4, 0, 1); // Bind L + __hpvm__edge(LNode, CZCNode, 1, 0, 1, 1); // Get bytesL + __hpvm__bindIn(CZCNode, 16, 2, 1); // Bind B + __hpvm__bindIn(CZCNode, 17, 3, 1); // Bind bytesB + __hpvm__bindIn(CZCNode, 6, 4, 1); // Bind S + __hpvm__bindIn(CZCNode, 7, 5, 1); // Bind bytesS + __hpvm__bindIn(CZCNode, 22, 6, 1); // Bind m + __hpvm__bindIn(CZCNode, 23, 7, 1); // Bind n // Gradient Inputs - __visc__bindIn(CGNode, 2, 0, 1); // Bind Is - __visc__edge(GSNode, CGNode, 1, 1, 1, 1); // Get bytesIs - __visc__bindIn(CGNode, 18, 2, 1); // Bind Sx - __visc__bindIn(CGNode, 19, 3, 1); // Bind bytesSx - __visc__bindIn(CGNode, 20, 4, 1); // Bind Sy - __visc__bindIn(CGNode, 21, 5, 1); // Bind bytesSy - __visc__bindIn(CGNode, 8, 6, 1); // Bind G - __visc__bindIn(CGNode, 9, 7, 1); // Bind bytesG - __visc__bindIn(CGNode, 22, 8, 1); // Bind m - __visc__bindIn(CGNode, 23, 9, 1); // Bind n + __hpvm__bindIn(CGNode, 2, 0, 1); // Bind Is + __hpvm__edge(GSNode, CGNode, 1, 1, 1, 1); // Get bytesIs + __hpvm__bindIn(CGNode, 18, 2, 1); // Bind Sx + __hpvm__bindIn(CGNode, 19, 3, 1); // Bind bytesSx + __hpvm__bindIn(CGNode, 20, 4, 1); // Bind Sy + __hpvm__bindIn(CGNode, 21, 5, 1); // Bind bytesSy + __hpvm__bindIn(CGNode, 8, 6, 1); // Bind G + __hpvm__bindIn(CGNode, 9, 7, 1); // Bind bytesG + __hpvm__bindIn(CGNode, 22, 8, 1); // Bind m + __hpvm__bindIn(CGNode, 23, 9, 1); // Bind n // Max Gradient Inputs - __visc__bindIn(CMGNode, 8, 0, 1); // Bind G - __visc__edge(CGNode, CMGNode, 1, 0, 1, 1); // Get bytesG - __visc__bindIn(CMGNode, 10, 2, 1); // Bind maxG - __visc__bindIn(CMGNode, 11, 3, 1); // Bind bytesMaxG - __visc__bindIn(CMGNode, 22, 4, 1); // Bind m - __visc__bindIn(CMGNode, 23, 5, 1); // Bind n - __visc__bindIn(CMGNode, 24, 6, 1); // Bind block_x - __visc__bindIn(CMGNode, 25, 7, 1); // Bind grid_x + __hpvm__bindIn(CMGNode, 8, 0, 1); // Bind G + __hpvm__edge(CGNode, CMGNode, 1, 0, 1, 1); // Get bytesG + __hpvm__bindIn(CMGNode, 10, 2, 1); // Bind maxG + __hpvm__bindIn(CMGNode, 11, 3, 1); // Bind bytesMaxG + __hpvm__bindIn(CMGNode, 22, 4, 1); // Bind m + __hpvm__bindIn(CMGNode, 23, 5, 1); // Bind n + __hpvm__bindIn(CMGNode, 24, 6, 1); // Bind block_x + __hpvm__bindIn(CMGNode, 25, 7, 1); // Bind grid_x // Reject ZC Inputs - __visc__bindIn(RZCNode, 6, 0, 1); // Bind S - __visc__edge(CZCNode, RZCNode, 1, 0, 1, 1); // Get bytesS - __visc__bindIn(RZCNode, 8, 2, 1); // Bind G - __visc__bindIn(RZCNode, 9, 3, 1); // Bind bytesG - __visc__bindIn(RZCNode, 10, 4, 1); // Bind maxG - __visc__edge(CMGNode, RZCNode, 1, 0, 5, 1); // Get bytesMaxG - __visc__bindIn(RZCNode, 12, 6, 1); // Bind E - __visc__bindIn(RZCNode, 13, 7, 1); // Bind bytesE - __visc__bindIn(RZCNode, 22, 8, 1); // Bind m - __visc__bindIn(RZCNode, 23, 9, 1); // Bind n - - __visc__bindOut(RZCNode, 0, 0, 1); // Bind output + __hpvm__bindIn(RZCNode, 6, 0, 1); // Bind S + __hpvm__edge(CZCNode, RZCNode, 1, 0, 1, 1); // Get bytesS + __hpvm__bindIn(RZCNode, 8, 2, 1); // Bind G + __hpvm__bindIn(RZCNode, 9, 3, 1); // Bind bytesG + __hpvm__bindIn(RZCNode, 10, 4, 1); // Bind maxG + __hpvm__edge(CMGNode, RZCNode, 1, 0, 5, 1); // Get bytesMaxG + __hpvm__bindIn(RZCNode, 12, 6, 1); // Bind E + __hpvm__bindIn(RZCNode, 13, 7, 1); // Bind bytesE + __hpvm__bindIn(RZCNode, 22, 8, 1); // Bind m + __hpvm__bindIn(RZCNode, 23, 9, 1); // Bind n + + __hpvm__bindOut(RZCNode, 0, 0, 1); // Bind output } } @@ -796,7 +796,7 @@ int main(int argc, char *argv[]) { assert(src.isContinuous() && Is.isContinuous() && L.isContinuous() && S.isContinuous() && G.isContinuous() && E.isContinuous()); - __visc__init(); + __hpvm__init(); // copy A to device memory I_sz = src.size[0] * src.size[1] * sizeof(float); @@ -843,7 +843,7 @@ int main(int argc, char *argv[]) { for (unsigned j = 0; j < NUM_RUNS; j++) { std::cout << "Run: " << j << "\n"; - void *DFG = __visc__launch(1, edgeDetection, (void *)args); + void *DFG = __hpvm__launch(1, edgeDetection, (void *)args); cap = VideoCapture(inFile); getNextFrame(cap, src); @@ -855,25 +855,25 @@ int main(int argc, char *argv[]) { *maxG = 0.0; - llvm_visc_track_mem(src.data, I_sz); - llvm_visc_track_mem(Is.data, I_sz); - llvm_visc_track_mem(L.data, I_sz); - llvm_visc_track_mem(S.data, I_sz); - llvm_visc_track_mem(G.data, I_sz); - llvm_visc_track_mem(maxG, bytesMaxG); - llvm_visc_track_mem(E.data, I_sz); - llvm_visc_track_mem(Gs, bytesGs); - llvm_visc_track_mem(B, bytesB); - llvm_visc_track_mem(Sx, bytesSx); - llvm_visc_track_mem(Sy, bytesSy); - - __visc__push(DFG, args); - void *ret = __visc__pop(DFG); + llvm_hpvm_track_mem(src.data, I_sz); + llvm_hpvm_track_mem(Is.data, I_sz); + llvm_hpvm_track_mem(L.data, I_sz); + llvm_hpvm_track_mem(S.data, I_sz); + llvm_hpvm_track_mem(G.data, I_sz); + llvm_hpvm_track_mem(maxG, bytesMaxG); + llvm_hpvm_track_mem(E.data, I_sz); + llvm_hpvm_track_mem(Gs, bytesGs); + llvm_hpvm_track_mem(B, bytesB); + llvm_hpvm_track_mem(Sx, bytesSx); + llvm_hpvm_track_mem(Sy, bytesSy); + + __hpvm__push(DFG, args); + void *ret = __hpvm__pop(DFG); std::cout << "Returned size: " << *(size_t *)ret << " expected " << I_sz << '\n'; - llvm_visc_request_mem(maxG, bytesMaxG); - llvm_visc_request_mem(E.data, I_sz); + llvm_hpvm_request_mem(maxG, bytesMaxG); + llvm_hpvm_request_mem(E.data, I_sz); Mat in, out; resize(src, in, Size(HEIGHT, WIDTH)); @@ -882,26 +882,26 @@ int main(int argc, char *argv[]) { imshow(input_window, in); waitKey(1); - llvm_visc_untrack_mem(src.data); - llvm_visc_untrack_mem(Is.data); - llvm_visc_untrack_mem(L.data); - llvm_visc_untrack_mem(S.data); - llvm_visc_untrack_mem(G.data); - llvm_visc_untrack_mem(maxG); - llvm_visc_untrack_mem(E.data); - llvm_visc_untrack_mem(Gs); - llvm_visc_untrack_mem(B); - llvm_visc_untrack_mem(Sx); - llvm_visc_untrack_mem(Sy); + llvm_hpvm_untrack_mem(src.data); + llvm_hpvm_untrack_mem(Is.data); + llvm_hpvm_untrack_mem(L.data); + llvm_hpvm_untrack_mem(S.data); + llvm_hpvm_untrack_mem(G.data); + llvm_hpvm_untrack_mem(maxG); + llvm_hpvm_untrack_mem(E.data); + llvm_hpvm_untrack_mem(Gs); + llvm_hpvm_untrack_mem(B); + llvm_hpvm_untrack_mem(Sx); + llvm_hpvm_untrack_mem(Sy); getNextFrame(cap, src); } } else { - __visc__push(DFG, args); - __visc__pop(DFG); + __hpvm__push(DFG, args); + __hpvm__pop(DFG); } - __visc__wait(DFG); + __hpvm__wait(DFG); } - __visc__cleanup(); + __hpvm__cleanup(); return 0; } diff --git a/hpvm/test/template/Makefile b/hpvm/test/template/Makefile index 3aa4bd1d6f2c3f7bb2be07ba5e662c5b6faf1655..d3344887e1cb516b09c4eb92bcad8f9b646d94a3 100644 --- a/hpvm/test/template/Makefile +++ b/hpvm/test/template/Makefile @@ -25,12 +25,12 @@ CURRENT_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) EXE = $(EXE_NAME)-$(TARGET) INCLUDES += -I$(SRC_DIR) -INCLUDES += -I$(LLVM_SRC_ROOT)/include -I../include -I$(VISC_BUILD_DIR)/include +INCLUDES += -I$(LLVM_SRC_ROOT)/include -I../include -I$(HPVM_BUILD_DIR)/include ## BEGIN HPVM MAKEFILE SRCDIR_OBJS= io.ll OBJS_SRC=src/io.cc -VISC_OBJS=main.visc.ll +HPVM_OBJS=main.hpvm.ll APP = $(EXE) APP_CFLAGS += $(INCLUDES) -ffast-math -O3 -fno-lax-vector-conversions -fno-vectorize -fno-slp-vectorize APP_CXXFLAGS += $(INCLUDES) -ffast-math -O3 -fno-lax-vector-conversions -fno-vectorize -fno-slp-vectorize @@ -41,21 +41,21 @@ OBJS_CFLAGS = $(APP_CFLAGS) $(PLATFORM_CFLAGS) CXXFLAGS = $(APP_CXXFLAGS) $(PLATFORM_CXXFLAGS) LDFLAGS= $(APP_LDFLAGS) $(PLATFORM_LDFLAGS) -VISC_RT_PATH = $(LLVM_BUILD_ROOT)/tools/hpvm/projects/visc-rt -VISC_RT_LIB = $(VISC_RT_PATH)/visc-rt.bc +HPVM_RT_PATH = $(LLVM_BUILD_ROOT)/tools/hpvm/projects/hpvm-rt +HPVM_RT_LIB = $(HPVM_RT_PATH)/hpvm-rt.bc -TESTGEN_OPTFLAGS = -load LLVMGenVISC.so -genvisc -globaldce +TESTGEN_OPTFLAGS = -load LLVMGenHPVM.so -genhpvm -globaldce ifeq ($(TARGET),seq) DEVICE = CPU_TARGET - VISC_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG - VISC_OPTFLAGS += -visc-timers-x86 + HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG + HPVM_OPTFLAGS += -hpvm-timers-x86 else DEVICE = GPU_TARGET - VISC_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_NVPTX.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -localmem -dfg2llvm-nvptx -dfg2llvm-x86 -clearDFG - VISC_OPTFLAGS += -visc-timers-x86 -visc-timers-ptx + HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_NVPTX.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -localmem -dfg2llvm-nvptx -dfg2llvm-x86 -clearDFG + HPVM_OPTFLAGS += -hpvm-timers-x86 -hpvm-timers-ptx endif - TESTGEN_OPTFLAGS += -visc-timers-gen + TESTGEN_OPTFLAGS += -hpvm-timers-gen CFLAGS += -DDEVICE=$(DEVICE) CXXFLAGS += -DDEVICE=$(DEVICE) @@ -66,7 +66,7 @@ INBUILDDIR=$(addprefix $(BUILD_DIR)/,$(1)) .PRECIOUS: $(BUILD_DIR)/%.ll OBJS = $(call INBUILDDIR,$(SRCDIR_OBJS)) -TEST_OBJS = $(call INBUILDDIR,$(VISC_OBJS)) +TEST_OBJS = $(call INBUILDDIR,$(HPVM_OBJS)) KERNEL = $(TEST_OBJS).kernels.ll ifeq ($(TARGET),gpu) @@ -91,11 +91,11 @@ $(KERNEL_OCL) : $(KERNEL) $(EXE) : $(HOST_LINKED) $(CXX) -O3 $(LDFLAGS) $< -o $@ -$(HOST_LINKED) : $(HOST) $(OBJS) $(VISC_RT_LIB) +$(HOST_LINKED) : $(HOST) $(OBJS) $(HPVM_RT_LIB) $(LLVM_LINK) $^ -S -o $@ -$(HOST) $(KERNEL): $(BUILD_DIR)/$(VISC_OBJS) - $(OPT) $(VISC_OPTFLAGS) -S $< -o $(HOST) +$(HOST) $(KERNEL): $(BUILD_DIR)/$(HPVM_OBJS) + $(OPT) $(HPVM_OPTFLAGS) -S $< -o $(HOST) $(BUILD_DIR): mkdir -p $(BUILD_DIR) @@ -106,7 +106,7 @@ $(BUILD_DIR)/%.ll : $(SRC_DIR)/%.cc $(BUILD_DIR)/main.ll : $(SRC_DIR)/main.cc $(CC) $(CXXFLAGS) -emit-llvm -S -o $@ $< -$(BUILD_DIR)/main.visc.ll : $(BUILD_DIR)/main.ll +$(BUILD_DIR)/main.hpvm.ll : $(BUILD_DIR)/main.ll $(OPT) $(TESTGEN_OPTFLAGS) $< -S -o $@ ## END HPVM MAKEFILE diff --git a/hpvm/test/template/README.md b/hpvm/test/template/README.md index 198604817d5a8463e555451a1188b426ec4e31cd..ca51cbb90018e316f7a7f775223c0741b7328841 100644 --- a/hpvm/test/template/README.md +++ b/hpvm/test/template/README.md @@ -6,19 +6,19 @@ Let's look at the compilation of the `pipeline` test for gpu as an example. /.../hpvm/build/bin/clang -Isrc/ -I -I/.../hpvm/llvm/include -I../include -I/.../hpvm/build/include -ffast-math -O3 -fno-lax-vector-conversions -fno-vectorize -fno-slp-vectorize -I/.../hpvm/llvm/include -I../include -I/.../hpvm/build/include -DDEVICE=GPU_TARGET -emit-llvm -S -o build/main.ll src/main.cc ``` -`opt` is used to invoke the GenVISC pass, which converts the HPVM function calls to LLVM intrinsics. +`opt` is used to invoke the GenHPVM pass, which converts the HPVM function calls to LLVM intrinsics. ``` -/.../hpvm/build/bin/opt -debug-only=genvisc -load LLVMGenVISC.so -genvisc -globaldce -visc-timers-gen build/main.ll -S -o build/main.visc.ll +/.../hpvm/build/bin/opt -debug-only=genhpvm -load LLVMGenHPVM.so -genhpvm -globaldce -hpvm-timers-gen build/main.ll -S -o build/main.hpvm.ll ``` `opt` is used again to invoke the BuildDFG pass, which converts the textual representation to the internal HPVM representation. ``` -/.../hpvm/build/bin/opt -debug -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_NVPTX.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -localmem -dfg2llvm-nvptx -dfg2llvm-x86 -clearDFG -visc-timers-x86 -visc-timers-ptx -S build/main.visc.ll -o build/pipeline-gpu.host.ll +/.../hpvm/build/bin/opt -debug -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_NVPTX.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -localmem -dfg2llvm-nvptx -dfg2llvm-x86 -clearDFG -hpvm-timers-x86 -hpvm-timers-ptx -S build/main.hpvm.ll -o build/pipeline-gpu.host.ll ``` `llvm-cbe` is a C backend for LLVM. It is used here to create the OpenCL kernel. ``` -/.../hpvm/build/bin/llvm-cbe -debug build/gpu/main.visc.ll.kernels.ll -o build/gpu/main.visc.ll.kernels.cl +/.../hpvm/build/bin/llvm-cbe -debug build/gpu/main.hpvm.ll.kernels.ll -o build/gpu/main.hpvm.ll.kernels.cl ``` `clang` is used again to compile a separate source file that contains I/O code. @@ -26,9 +26,9 @@ Let's look at the compilation of the `pipeline` test for gpu as an example. /.../hpvm/build/bin/clang -Isrc/ -I -I/.../hpvm/llvm/include -I../include -I/.../hpvm/build/include -ffast-math -O3 -fno-lax-vector-conversions -fno-vectorize -fno-slp-vectorize -I/.../hpvm/llvm/include -I../include -I/.../hpvm/build/include -emit-llvm -S -o build/gpu/io.ll src/io.cc ``` -`llvm-link` is used to link against the VISC runtime. +`llvm-link` is used to link against the HPVM runtime. ``` -/.../hpvm/build/bin/llvm-link build/gpu/pipeline-gpu.host.ll build/gpu/io.ll /.../hpvm/llvm/tools/hpvm/projects/visc-rt/visc-rt.ll -S -o build/gpu/pipeline-gpu.linked.ll +/.../hpvm/build/bin/llvm-link build/gpu/pipeline-gpu.host.ll build/gpu/io.ll /.../hpvm/llvm/tools/hpvm/projects/hpvm-rt/hpvm-rt.ll -S -o build/gpu/pipeline-gpu.linked.ll ``` `clang++` is used to do the final linking against OpenCL and emit the binary. diff --git a/hpvm/test/unitTests/CreateNodeAndEdge.c b/hpvm/test/unitTests/CreateNodeAndEdge.c index 1b6b1cff211d5af5a909065af988aadbe979f2ec..c3f58c95d631b5c49a47de1cbe41ed5ea871f5f4 100644 --- a/hpvm/test/unitTests/CreateNodeAndEdge.c +++ b/hpvm/test/unitTests/CreateNodeAndEdge.c @@ -1,4 +1,4 @@ -#include "visc.h" +#include "hpvm.h" #include <stdio.h> struct Root { @@ -7,33 +7,33 @@ struct Root { }; void Func1(int *In, int *Out) { - __visc__hint(CPU_TARGET); - __visc__attributes(1, In, 1, Out); + __hpvm__hint(CPU_TARGET); + __hpvm__attributes(1, In, 1, Out); - __visc__return(1, Out); + __hpvm__return(1, Out); } void Func2(int *BindIn, int *SrcIn, int *Out) { - __visc__hint(CPU_TARGET); - __visc__attributes(2, BindIn, SrcIn, 1, Out); + __hpvm__hint(CPU_TARGET); + __hpvm__attributes(2, BindIn, SrcIn, 1, Out); - __visc__return(1, Out); + __hpvm__return(1, Out); } void PipeRoot(int *In, int *Out) { - __visc__hint(CPU_TARGET); + __hpvm__hint(CPU_TARGET); - __visc__attributes(1, In, 1, Out); + __hpvm__attributes(1, In, 1, Out); - void *SrcNode = __visc__createNodeND(0, Func1); - void *DestNode = __visc__createNodeND(0, Func2); + void *SrcNode = __hpvm__createNodeND(0, Func1); + void *DestNode = __hpvm__createNodeND(0, Func2); - __visc__bindIn(SrcNode, 0, 0, 0); + __hpvm__bindIn(SrcNode, 0, 0, 0); - __visc__bindIn(DestNode, 0, 0, 0); - __visc__edge(SrcNode, DestNode, 1, 0, 1, 0); + __hpvm__bindIn(DestNode, 0, 0, 0); + __hpvm__edge(SrcNode, DestNode, 1, 0, 1, 0); - __visc__bindOut(SrcNode, 0, 0, 0); + __hpvm__bindOut(SrcNode, 0, 0, 0); } int main(void) { @@ -41,10 +41,10 @@ int main(void) { int Out = 0; struct Root RootArgs = {(int *)&In, (int *)&Out}; - __visc__init(); - void *PipeDFG = __visc__launch(0, PipeRoot, (void *)&RootArgs); - __visc__wait(PipeDFG); - __visc__cleanup(); + __hpvm__init(); + void *PipeDFG = __hpvm__launch(0, PipeRoot, (void *)&RootArgs); + __hpvm__wait(PipeDFG); + __hpvm__cleanup(); return 0; } diff --git a/hpvm/test/unitTests/Makefile b/hpvm/test/unitTests/Makefile index 539ee5e8fbf010d33663c98470b245bb2710eeea..15580e9300a119f55e4a828b645c27dd00b62ff8 100644 --- a/hpvm/test/unitTests/Makefile +++ b/hpvm/test/unitTests/Makefile @@ -2,8 +2,8 @@ PASSES := .PHONY: clean -LLVM_INSTALL:=/home/psrivas2/Hetero/VISC/Code/trunk/llvm-install -LIBCLC:=/home/psrivas2/Hetero/VISC/Code/trunk/libclc +LLVM_INSTALL:=/home/psrivas2/Hetero/HPVM/Code/trunk/llvm-install +LIBCLC:=/home/psrivas2/Hetero/HPVM/Code/trunk/libclc HOST:=gemm_opencl KERNELS:=matrixMul LLVM_CC:=$(LLVM_INSTALL)/bin/clang diff --git a/hpvm/test/unitTests/MallocIntrinsic.c b/hpvm/test/unitTests/MallocIntrinsic.c index cfd041a991d976c24b372a81b35842598b571d89..173f6b3b16d1090a98242d345cefa330910d862d 100644 --- a/hpvm/test/unitTests/MallocIntrinsic.c +++ b/hpvm/test/unitTests/MallocIntrinsic.c @@ -1,4 +1,4 @@ -#include "visc.h" +#include "hpvm.h" #include <stdlib.h> struct Root { @@ -7,12 +7,12 @@ struct Root { }; void PipeRoot(int *In, int *Out) { - __visc__hint(CPU_TARGET); - __visc__attributes(1, In, 1, Out); + __hpvm__hint(CPU_TARGET); + __hpvm__attributes(1, In, 1, Out); - Out = (int *)__visc__malloc(*In); + Out = (int *)__hpvm__malloc(*In); - __visc__return(1, Out); + __hpvm__return(1, Out); } int main(void) { @@ -26,12 +26,12 @@ int main(void) { RootArgs->input = (int *)&In; RootArgs->output = (int *)&Out; - __visc__init(); + __hpvm__init(); - void *PipeDFG = __visc__launch(0, PipeRoot, (void *)RootArgs); - __visc__wait(PipeDFG); + void *PipeDFG = __hpvm__launch(0, PipeRoot, (void *)RootArgs); + __hpvm__wait(PipeDFG); - __visc__cleanup(); + __hpvm__cleanup(); return 0; } diff --git a/hpvm/test/unitTests/PipelineIntrinsics.c b/hpvm/test/unitTests/PipelineIntrinsics.c index 2a9bf83402891beddf13d96c6346e8fed924d17e..43ba0ef56cf160acb1fab6ea334732e56e0359d2 100644 --- a/hpvm/test/unitTests/PipelineIntrinsics.c +++ b/hpvm/test/unitTests/PipelineIntrinsics.c @@ -1,4 +1,4 @@ -#include "visc.h" +#include "hpvm.h" #include <stdlib.h> struct Root { @@ -7,9 +7,9 @@ struct Root { }; void PipeRoot(int *In, int *Out) { - __visc__hint(CPU_TARGET); - __visc__attributes(1, In, 1, Out); - __visc__return(1, Out); + __hpvm__hint(CPU_TARGET); + __hpvm__attributes(1, In, 1, Out); + __hpvm__return(1, Out); } int main(void) { @@ -23,12 +23,12 @@ int main(void) { RootArgs->input = (int *)&In; RootArgs->output = (int *)&Out; - __visc__init(); + __hpvm__init(); - void *PipeDFG = __visc__launch(0, PipeRoot, (void *)RootArgs); - __visc__wait(PipeDFG); + void *PipeDFG = __hpvm__launch(0, PipeRoot, (void *)RootArgs); + __hpvm__wait(PipeDFG); - __visc__cleanup(); + __hpvm__cleanup(); return 0; } diff --git a/hpvm/test/unitTests/PipelineIntrinsics.malloc.c b/hpvm/test/unitTests/PipelineIntrinsics.malloc.c index 36fc02d22b066025be4a57695265779d8e55652a..c2deed98679bf794316f283acef8e3c1db9ffa88 100644 --- a/hpvm/test/unitTests/PipelineIntrinsics.malloc.c +++ b/hpvm/test/unitTests/PipelineIntrinsics.malloc.c @@ -1,4 +1,4 @@ -#include "visc.h" +#include "hpvm.h" #include <stdlib.h> struct Root { @@ -7,24 +7,24 @@ struct Root { }; void PipeRoot(int *In, int *Out) { - __visc__hint(CPU_TARGET); - __visc__attributes(1, In, 1, Out); - __visc__return(1, Out); + __hpvm__hint(CPU_TARGET); + __hpvm__attributes(1, In, 1, Out); + __hpvm__return(1, Out); } int main(void) { int In, Out; - __visc__init(); + __hpvm__init(); struct Root *RootArgs = (struct Root *)malloc(sizeof(struct Root)); RootArgs->input = (int *)&In; RootArgs->output = (int *)&Out; - void *PipeDFG = __visc__launch(0, PipeRoot, (void *)RootArgs); - __visc__wait(PipeDFG); + void *PipeDFG = __hpvm__launch(0, PipeRoot, (void *)RootArgs); + __hpvm__wait(PipeDFG); - __visc__cleanup(); + __hpvm__cleanup(); return 0; } diff --git a/hpvm/test/unitTests/temp/3level.ll b/hpvm/test/unitTests/temp/3level.ll index 168e7b42322c8f7fa4be83a64cbd06d44dd9e428..2e3753f1400798d0989e2a01be78ab338205a291 100644 --- a/hpvm/test/unitTests/temp/3level.ll +++ b/hpvm/test/unitTests/temp/3level.ll @@ -1,5 +1,5 @@ ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S < %s -; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll +; RUN: llvm-link %t.ll ~/current-src/projects/hpvm-rt/hpvm-rt.ll -S -o %t.linked.ll ; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -lrt -o %t.bin ; RUN: %t.bin 5 ; ModuleID = '/home/psrivas2/current-test/unitTests/3level.ll' @@ -13,31 +13,31 @@ target triple = "x86_64-unknown-linux-gnu" @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 ; Function Attrs: nounwind -declare void @llvm.visc.init() #1 +declare void @llvm.hpvm.init() #1 ; Function Attrs: nounwind -declare void @llvm.visc.cleanup() #1 +declare void @llvm.hpvm.cleanup() #1 ; Function Attrs: nounwind -declare i8* @llvm.visc.createNode(i8*) #0 +declare i8* @llvm.hpvm.createNode(i8*) #0 ; Function Attrs: nounwind -declare i8* @llvm.visc.createEdge(i8*, i8*, i1, i32, i32) #0 +declare i8* @llvm.hpvm.createEdge(i8*, i8*, i1, i32, i32) #0 ; Function Attrs: nounwind -declare i8* @llvm.visc.launch(i8*, i8*) #0 +declare i8* @llvm.hpvm.launch(i8*, i8*) #0 ; Function Attrs: nounwind -declare void @llvm.visc.wait(i8*) #0 +declare void @llvm.hpvm.wait(i8*) #0 ; Function Attrs: nounwind -declare i8* @llvm.visc.getNode() #0 +declare i8* @llvm.hpvm.getNode() #0 ; Function Attrs: nounwind -declare void @llvm.visc.bind.input(i8*, i32, i32) +declare void @llvm.hpvm.bind.input(i8*, i32, i32) ; Function Attrs: nounwind -declare void @llvm.visc.bind.output(i8*, i32, i32) +declare void @llvm.hpvm.bind.output(i8*, i32, i32) ; Function Attrs: nounwind uwtable define i32 @main(i32 %argc, i8** nocapture %argv) #1 { @@ -47,18 +47,18 @@ entry: %0 = load i8** %arrayidx, align 8, !tbaa !0 %call.i = tail call i64 @strtol(i8* nocapture %0, i8** null, i32 10) #0 %conv.i = trunc i64 %call.i to i32 - call void @llvm.visc.init() + call void @llvm.hpvm.init() %1 = bitcast %struct.arg* %in.addr to i32* store i32 %conv.i, i32* %1 %args = bitcast %struct.arg* %in.addr to i8* - %graphID = call i8* @llvm.visc.launch(i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args) + %graphID = call i8* @llvm.hpvm.launch(i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args) %call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %conv.i) #0 - call void @llvm.visc.wait(i8* %graphID) + call void @llvm.hpvm.wait(i8* %graphID) %2 = getelementptr %struct.arg* %in.addr, i32 0, i32 1 %outputstruct = load %rtype* %2 %output1 = extractvalue %rtype %outputstruct, 0 %output2 = extractvalue %rtype %outputstruct, 1 - call void @llvm.visc.cleanup() + call void @llvm.hpvm.cleanup() %call2 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %output1) #0 %call3 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %output2) #0 ret i32 0 @@ -83,21 +83,21 @@ define %rtype_internal @foo(i32 %id) { } define %rtype_internal @subNode(i32 %id) { - %foo_node = call i8* @llvm.visc.createNode(i8* bitcast (%rtype_internal (i32)* @foo to i8*)) - call void @llvm.visc.bind.input(i8* %foo_node, i32 0, i32 0) - call void @llvm.visc.bind.output(i8* %foo_node, i32 0, i32 0) + %foo_node = call i8* @llvm.hpvm.createNode(i8* bitcast (%rtype_internal (i32)* @foo to i8*)) + call void @llvm.hpvm.bind.input(i8* %foo_node, i32 0, i32 0) + call void @llvm.hpvm.bind.output(i8* %foo_node, i32 0, i32 0) ret %rtype_internal zeroinitializer } define %rtype @Root(i32 %id) { - %p_node = call i8* @llvm.visc.createNode(i8* bitcast (%rtype_internal (i32)* @producer to i8*)) - %c_node = call i8* @llvm.visc.createNode(i8* bitcast (%rtype_internal (i32)* @consumer to i8*)) - %sub_node = call i8* @llvm.visc.createNode(i8* bitcast (%rtype_internal (i32)* @subNode to i8*)) - %edge = call i8* @llvm.visc.createEdge(i8* %p_node, i8* %c_node, i1 false, i32 0, i32 0) - call void @llvm.visc.bind.input(i8* %p_node, i32 0, i32 0) - call void @llvm.visc.bind.output(i8* %c_node, i32 0, i32 0) - call void @llvm.visc.bind.input(i8* %sub_node, i32 0, i32 0) - call void @llvm.visc.bind.output(i8* %sub_node, i32 0, i32 1) + %p_node = call i8* @llvm.hpvm.createNode(i8* bitcast (%rtype_internal (i32)* @producer to i8*)) + %c_node = call i8* @llvm.hpvm.createNode(i8* bitcast (%rtype_internal (i32)* @consumer to i8*)) + %sub_node = call i8* @llvm.hpvm.createNode(i8* bitcast (%rtype_internal (i32)* @subNode to i8*)) + %edge = call i8* @llvm.hpvm.createEdge(i8* %p_node, i8* %c_node, i1 false, i32 0, i32 0) + call void @llvm.hpvm.bind.input(i8* %p_node, i32 0, i32 0) + call void @llvm.hpvm.bind.output(i8* %c_node, i32 0, i32 0) + call void @llvm.hpvm.bind.input(i8* %sub_node, i32 0, i32 0) + call void @llvm.hpvm.bind.output(i8* %sub_node, i32 0, i32 1) ret %rtype zeroinitializer } diff --git a/hpvm/test/unitTests/temp/Makefile b/hpvm/test/unitTests/temp/Makefile index 539ee5e8fbf010d33663c98470b245bb2710eeea..15580e9300a119f55e4a828b645c27dd00b62ff8 100644 --- a/hpvm/test/unitTests/temp/Makefile +++ b/hpvm/test/unitTests/temp/Makefile @@ -2,8 +2,8 @@ PASSES := .PHONY: clean -LLVM_INSTALL:=/home/psrivas2/Hetero/VISC/Code/trunk/llvm-install -LIBCLC:=/home/psrivas2/Hetero/VISC/Code/trunk/libclc +LLVM_INSTALL:=/home/psrivas2/Hetero/HPVM/Code/trunk/llvm-install +LIBCLC:=/home/psrivas2/Hetero/HPVM/Code/trunk/libclc HOST:=gemm_opencl KERNELS:=matrixMul LLVM_CC:=$(LLVM_INSTALL)/bin/clang diff --git a/hpvm/test/unitTests/temp/query2D.ll b/hpvm/test/unitTests/temp/query2D.ll index c994c2a3ff5b166b2f192f4b900982b3b7afc508..48358a3527553c8f4a31ff89454010289d02c072 100644 --- a/hpvm/test/unitTests/temp/query2D.ll +++ b/hpvm/test/unitTests/temp/query2D.ll @@ -1,5 +1,5 @@ ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S < %s -; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll +; RUN: llvm-link %t.ll ~/current-src/projects/hpvm-rt/hpvm-rt.ll -S -o %t.linked.ll ; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -lrt -o %t.bin ; RUN: %t.bin 5 ; ModuleID = '/home/psrivas2/current-test/unitTests/query2D.ll' @@ -12,46 +12,46 @@ target triple = "x86_64-unknown-linux-gnu" @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 ; Function Attrs: nounwind -declare void @llvm.visc.init() #1 +declare void @llvm.hpvm.init() #1 ; Function Attrs: nounwind -declare void @llvm.visc.cleanup() #1 +declare void @llvm.hpvm.cleanup() #1 ; Function Attrs: nounwind -declare i8* @llvm.visc.createNode(i8*) #0 +declare i8* @llvm.hpvm.createNode(i8*) #0 ; Function Attrs: nounwind -declare i8* @llvm.visc.createNode1D(i8*, i32) #0 +declare i8* @llvm.hpvm.createNode1D(i8*, i32) #0 ; Function Attrs: nounwind -declare i8* @llvm.visc.createNode2D(i8*, i32, i32) #0 +declare i8* @llvm.hpvm.createNode2D(i8*, i32, i32) #0 ; Function Attrs: nounwind -declare i8* @llvm.visc.createEdge(i8*, i8*, i1, i32, i32) #0 +declare i8* @llvm.hpvm.createEdge(i8*, i8*, i1, i32, i32) #0 ; Function Attrs: nounwind -declare i8* @llvm.visc.launch(i8*, i8*) #0 +declare i8* @llvm.hpvm.launch(i8*, i8*) #0 ; Function Attrs: nounwind -declare void @llvm.visc.wait(i8*) #0 +declare void @llvm.hpvm.wait(i8*) #0 ; Function Attrs: nounwind -declare void @llvm.visc.bind.input(i8*, i32, i32) +declare void @llvm.hpvm.bind.input(i8*, i32, i32) ; Function Attrs: nounwind -declare void @llvm.visc.bind.output(i8*, i32, i32) +declare void @llvm.hpvm.bind.output(i8*, i32, i32) ; Function Attrs: nounwind -declare i8* @llvm.visc.getNode() #0 +declare i8* @llvm.hpvm.getNode() #0 ; Function Attrs: nounwind -declare i8* @llvm.visc.getParentNode(i8*) #0 +declare i8* @llvm.hpvm.getParentNode(i8*) #0 ; Function Attrs: nounwind -declare i32 @llvm.visc.getNumDims(i8*) #0 +declare i32 @llvm.hpvm.getNumDims(i8*) #0 ; Function Attrs: nounwind -declare i32 @llvm.visc.getNumNodeInstances.x(i8*) #0 +declare i32 @llvm.hpvm.getNumNodeInstances.x(i8*) #0 ; Function Attrs: nounwind uwtable define i32 @main(i32 %argc, i8** nocapture %argv) #1 { @@ -61,25 +61,25 @@ entry: %0 = load i8** %arrayidx, align 8, !tbaa !0 %call.i = tail call i64 @strtol(i8* nocapture %0, i8** null, i32 10) #0 %conv.i = trunc i64 %call.i to i32 - call void @llvm.visc.init() + call void @llvm.hpvm.init() %1 = bitcast %struct.arg* %in.addr to i32* store i32 %conv.i, i32* %1 %args = bitcast %struct.arg* %in.addr to i8* - %graphID = call i8* @llvm.visc.launch(i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args) + %graphID = call i8* @llvm.hpvm.launch(i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args) %call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %conv.i) #0 - call void @llvm.visc.wait(i8* %graphID) + call void @llvm.hpvm.wait(i8* %graphID) %2 = getelementptr %struct.arg* %in.addr, i32 0, i32 1 %outputstruct = load %rtype* %2 %output = extractvalue %rtype %outputstruct, 0 - call void @llvm.visc.cleanup() + call void @llvm.hpvm.cleanup() %call2 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %output) #0 ret i32 0 } define %rtype @producer(i32 %id) { %sum = add i32 4, %id - %this_node = call i8* @llvm.visc.getNode() - %dim = call i32 @llvm.visc.getNumNodeInstances.x(i8* %this_node) + %this_node = call i8* @llvm.hpvm.getNode() + %dim = call i32 @llvm.hpvm.getNumNodeInstances.x(i8* %this_node) %sum2 = add i32 %sum, %dim %output = insertvalue %rtype undef, i32 %sum2, 0 ret %rtype %output @@ -92,11 +92,11 @@ define %rtype @consumer(i32 %id) { } define %rtype @Root(i32 %dimension) { - %p_node = call i8* @llvm.visc.createNode2D(i8* bitcast (%rtype (i32)* @producer to i8*), i32 %dimension, i32 %dimension) - %c_node = call i8* @llvm.visc.createNode(i8* bitcast (%rtype (i32)* @consumer to i8*)) - %edge = call i8* @llvm.visc.createEdge(i8* %p_node, i8* %c_node, i1 false, i32 0, i32 0) - call void @llvm.visc.bind.input(i8* %p_node, i32 0, i32 0) - call void @llvm.visc.bind.output(i8* %c_node, i32 0, i32 0) + %p_node = call i8* @llvm.hpvm.createNode2D(i8* bitcast (%rtype (i32)* @producer to i8*), i32 %dimension, i32 %dimension) + %c_node = call i8* @llvm.hpvm.createNode(i8* bitcast (%rtype (i32)* @consumer to i8*)) + %edge = call i8* @llvm.hpvm.createEdge(i8* %p_node, i8* %c_node, i1 false, i32 0, i32 0) + call void @llvm.hpvm.bind.input(i8* %p_node, i32 0, i32 0) + call void @llvm.hpvm.bind.output(i8* %c_node, i32 0, i32 0) ret %rtype zeroinitializer } diff --git a/hpvm/test/unitTests/temp/query3D.ll b/hpvm/test/unitTests/temp/query3D.ll index 438fe60a3bc6c2dfe718da76d55041addc47367f..d2ff16ef56628752b997577891c44fd904be4405 100644 --- a/hpvm/test/unitTests/temp/query3D.ll +++ b/hpvm/test/unitTests/temp/query3D.ll @@ -1,5 +1,5 @@ ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S < %s -; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll +; RUN: llvm-link %t.ll ~/current-src/projects/hpvm-rt/hpvm-rt.ll -S -o %t.linked.ll ; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -lrt -o %t.bin ; RUN: %t.bin 5 ; ModuleID = '/home/psrivas2/current-test/unitTests/query3D.ll' @@ -12,57 +12,57 @@ target triple = "x86_64-unknown-linux-gnu" @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 ; Function Attrs: nounwind -declare void @llvm.visc.init() #1 +declare void @llvm.hpvm.init() #1 ; Function Attrs: nounwind -declare void @llvm.visc.cleanup() #1 +declare void @llvm.hpvm.cleanup() #1 ; Function Attrs: nounwind -declare i8* @llvm.visc.createNode(i8*) #0 +declare i8* @llvm.hpvm.createNode(i8*) #0 ; Function Attrs: nounwind -declare i8* @llvm.visc.createNode1D(i8*, i32) #0 +declare i8* @llvm.hpvm.createNode1D(i8*, i32) #0 ; Function Attrs: nounwind -declare i8* @llvm.visc.createNode2D(i8*, i32, i32) #0 +declare i8* @llvm.hpvm.createNode2D(i8*, i32, i32) #0 ; Function Attrs: nounwind -declare i8* @llvm.visc.createNode3D(i8*, i32, i32, i32) #0 +declare i8* @llvm.hpvm.createNode3D(i8*, i32, i32, i32) #0 ; Function Attrs: nounwind -declare i8* @llvm.visc.createEdge(i8*, i8*, i1, i32, i32) #0 +declare i8* @llvm.hpvm.createEdge(i8*, i8*, i1, i32, i32) #0 ; Function Attrs: nounwind -declare i8* @llvm.visc.launch(i8*, i8*) #0 +declare i8* @llvm.hpvm.launch(i8*, i8*) #0 ; Function Attrs: nounwind -declare void @llvm.visc.wait(i8*) #0 +declare void @llvm.hpvm.wait(i8*) #0 ; Function Attrs: nounwind -declare void @llvm.visc.bind.input(i8*, i32, i32) +declare void @llvm.hpvm.bind.input(i8*, i32, i32) ; Function Attrs: nounwind -declare void @llvm.visc.bind.output(i8*, i32, i32) +declare void @llvm.hpvm.bind.output(i8*, i32, i32) ; Function Attrs: nounwind -declare i8* @llvm.visc.getNode() #0 +declare i8* @llvm.hpvm.getNode() #0 ; Function Attrs: nounwind -declare i8* @llvm.visc.getParentNode(i8*) #0 +declare i8* @llvm.hpvm.getParentNode(i8*) #0 ; Function Attrs: nounwind -declare i32 @llvm.visc.getNumDims(i8*) #0 +declare i32 @llvm.hpvm.getNumDims(i8*) #0 ; Function Attrs: nounwind -declare i32 @llvm.visc.getNumNodeInstances.x(i8*) #0 +declare i32 @llvm.hpvm.getNumNodeInstances.x(i8*) #0 ; Function Attrs: nounwind -declare i32 @llvm.visc.getNumNodeInstances.y(i8*) #0 +declare i32 @llvm.hpvm.getNumNodeInstances.y(i8*) #0 ; Function Attrs: nounwind uwtable define i32 @main(i32 %argc, i8** nocapture %argv) #1 { entry: - call void @llvm.visc.init() + call void @llvm.hpvm.init() %in.addr = alloca %struct.arg %arrayidx = getelementptr inbounds i8** %argv, i64 1 %0 = load i8** %arrayidx, align 8, !tbaa !0 @@ -71,21 +71,21 @@ entry: %1 = bitcast %struct.arg* %in.addr to i32* store i32 %conv.i, i32* %1 %args = bitcast %struct.arg* %in.addr to i8* - %graphID = call i8* @llvm.visc.launch(i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args) + %graphID = call i8* @llvm.hpvm.launch(i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args) %call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %conv.i) #0 - call void @llvm.visc.wait(i8* %graphID) + call void @llvm.hpvm.wait(i8* %graphID) %2 = getelementptr %struct.arg* %in.addr, i32 0, i32 1 %outputstruct = load %rtype* %2 %output = extractvalue %rtype %outputstruct, 0 %call2 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %output) #0 - call void @llvm.visc.cleanup() + call void @llvm.hpvm.cleanup() ret i32 0 } define %rtype @producer(i32 %id) { %sum = add i32 4, %id - %this_node = call i8* @llvm.visc.getNode() - %dim = call i32 @llvm.visc.getNumNodeInstances.y(i8* %this_node) + %this_node = call i8* @llvm.hpvm.getNode() + %dim = call i32 @llvm.hpvm.getNumNodeInstances.y(i8* %this_node) %sum2 = add i32 %sum, %dim %output = insertvalue %rtype undef, i32 %sum2, 0 ret %rtype %output @@ -98,11 +98,11 @@ define %rtype @consumer(i32 %id) { } define %rtype @Root(i32 %dimension) { - %p_node = call i8* @llvm.visc.createNode3D(i8* bitcast (%rtype (i32)* @producer to i8*), i32 %dimension, i32 10, i32 30) - %c_node = call i8* @llvm.visc.createNode(i8* bitcast (%rtype (i32)* @consumer to i8*)) - %edge = call i8* @llvm.visc.createEdge(i8* %p_node, i8* %c_node, i1 false, i32 0, i32 0) - call void @llvm.visc.bind.input(i8* %p_node, i32 0, i32 0) - call void @llvm.visc.bind.output(i8* %c_node, i32 0, i32 0) + %p_node = call i8* @llvm.hpvm.createNode3D(i8* bitcast (%rtype (i32)* @producer to i8*), i32 %dimension, i32 10, i32 30) + %c_node = call i8* @llvm.hpvm.createNode(i8* bitcast (%rtype (i32)* @consumer to i8*)) + %edge = call i8* @llvm.hpvm.createEdge(i8* %p_node, i8* %c_node, i1 false, i32 0, i32 0) + call void @llvm.hpvm.bind.input(i8* %p_node, i32 0, i32 0) + call void @llvm.hpvm.bind.output(i8* %c_node, i32 0, i32 0) ret %rtype zeroinitializer } diff --git a/hpvm/test/unitTests/temp/queryNodeInst.ll b/hpvm/test/unitTests/temp/queryNodeInst.ll index 24d6a3f0d30e6661c0f1396e082f889d54dc50be..4e3dd7553045d466199c726416db220a6be2d1aa 100644 --- a/hpvm/test/unitTests/temp/queryNodeInst.ll +++ b/hpvm/test/unitTests/temp/queryNodeInst.ll @@ -1,5 +1,5 @@ ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S < %s -; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll +; RUN: llvm-link %t.ll ~/current-src/projects/hpvm-rt/hpvm-rt.ll -S -o %t.linked.ll ; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -lrt -o %t.bin ; RUN: %t.bin 5 ; ModuleID = '/home/psrivas2/current-test/unitTests/twoNode.ll' @@ -12,40 +12,40 @@ target triple = "x86_64-unknown-linux-gnu" @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 ; Function Attrs: nounwind -declare void @llvm.visc.init() #1 +declare void @llvm.hpvm.init() #1 ; Function Attrs: nounwind -declare void @llvm.visc.cleanup() #1 +declare void @llvm.hpvm.cleanup() #1 ; Function Attrs: nounwind -declare i8* @llvm.visc.createNode(i8*) #0 +declare i8* @llvm.hpvm.createNode(i8*) #0 ; Function Attrs: nounwind -declare i8* @llvm.visc.createEdge(i8*, i8*, i1, i32, i32) #0 +declare i8* @llvm.hpvm.createEdge(i8*, i8*, i1, i32, i32) #0 ; Function Attrs: nounwind -declare i8* @llvm.visc.launch(i8*, i8*) #0 +declare i8* @llvm.hpvm.launch(i8*, i8*) #0 ; Function Attrs: nounwind -declare void @llvm.visc.wait(i8*) #0 +declare void @llvm.hpvm.wait(i8*) #0 ; Function Attrs: nounwind -declare void @llvm.visc.bind.input(i8*, i32, i32) +declare void @llvm.hpvm.bind.input(i8*, i32, i32) ; Function Attrs: nounwind -declare void @llvm.visc.bind.output(i8*, i32, i32) +declare void @llvm.hpvm.bind.output(i8*, i32, i32) ; Function Attrs: nounwind -declare i8* @llvm.visc.getNode() #0 +declare i8* @llvm.hpvm.getNode() #0 ; Function Attrs: nounwind -declare i32 @llvm.visc.getNumDims(i8*) #0 +declare i32 @llvm.hpvm.getNumDims(i8*) #0 ; Function Attrs: nounwind uwtable define i32 @main(i32 %argc, i8** nocapture %argv) #1 { entry: - call void @llvm.visc.init() + call void @llvm.hpvm.init() %in.addr = alloca %struct.arg %arrayidx = getelementptr inbounds i8** %argv, i64 1 %0 = load i8** %arrayidx, align 8, !tbaa !0 @@ -54,21 +54,21 @@ entry: %1 = bitcast %struct.arg* %in.addr to i32* store i32 %conv.i, i32* %1 %args = bitcast %struct.arg* %in.addr to i8* - %graphID = call i8* @llvm.visc.launch(i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args) + %graphID = call i8* @llvm.hpvm.launch(i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args) %call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %conv.i) #0 - call void @llvm.visc.wait(i8* %graphID) + call void @llvm.hpvm.wait(i8* %graphID) %2 = getelementptr %struct.arg* %in.addr, i32 0, i32 1 %outputstruct = load %rtype* %2 %output = extractvalue %rtype %outputstruct, 0 %call2 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %output) #0 - call void @llvm.visc.cleanup() + call void @llvm.hpvm.cleanup() ret i32 0 } define %rtype @producer(i32 %id) { %sum = add i32 4, %id - %this_node = call i8* @llvm.visc.getNode() - %numDim = call i32 @llvm.visc.getNumDims(i8* %this_node) + %this_node = call i8* @llvm.hpvm.getNode() + %numDim = call i32 @llvm.hpvm.getNumDims(i8* %this_node) %sum2 = add i32 %sum, %numDim %output = insertvalue %rtype undef, i32 %sum, 0 ret %rtype %output @@ -81,11 +81,11 @@ define %rtype @consumer(i32 %id) { } define %rtype @Root(i32 %id) { - %p_node = call i8* @llvm.visc.createNode(i8* bitcast (%rtype (i32)* @producer to i8*)) - %c_node = call i8* @llvm.visc.createNode(i8* bitcast (%rtype (i32)* @consumer to i8*)) - %edge = call i8* @llvm.visc.createEdge(i8* %p_node, i8* %c_node, i1 false, i32 0, i32 0) - call void @llvm.visc.bind.input(i8* %p_node, i32 0, i32 0) - call void @llvm.visc.bind.output(i8* %c_node, i32 0, i32 0) + %p_node = call i8* @llvm.hpvm.createNode(i8* bitcast (%rtype (i32)* @producer to i8*)) + %c_node = call i8* @llvm.hpvm.createNode(i8* bitcast (%rtype (i32)* @consumer to i8*)) + %edge = call i8* @llvm.hpvm.createEdge(i8* %p_node, i8* %c_node, i1 false, i32 0, i32 0) + call void @llvm.hpvm.bind.input(i8* %p_node, i32 0, i32 0) + call void @llvm.hpvm.bind.output(i8* %c_node, i32 0, i32 0) ret %rtype zeroinitializer } diff --git a/hpvm/test/unitTests/temp/queryNumDim.ll b/hpvm/test/unitTests/temp/queryNumDim.ll index 500e2ff41bd52f29a56cfd49563927bf6323482b..caa0978dabab0bf6295853e35f23e3ed68f00840 100644 --- a/hpvm/test/unitTests/temp/queryNumDim.ll +++ b/hpvm/test/unitTests/temp/queryNumDim.ll @@ -1,5 +1,5 @@ ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S < %s -; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll +; RUN: llvm-link %t.ll ~/current-src/projects/hpvm-rt/hpvm-rt.ll -S -o %t.linked.ll ; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -lrt -o %t.bin ; RUN: %t.bin 5 ; ModuleID = '/home/psrivas2/current-test/unitTests/twoNode.ll' @@ -12,42 +12,42 @@ target triple = "x86_64-unknown-linux-gnu" @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 ; Function Attrs: nounwind -declare void @llvm.visc.init() #1 +declare void @llvm.hpvm.init() #1 ; Function Attrs: nounwind -declare void @llvm.visc.cleanup() #1 +declare void @llvm.hpvm.cleanup() #1 ; Function Attrs: nounwind -declare i8* @llvm.visc.createNode(i8*) #0 +declare i8* @llvm.hpvm.createNode(i8*) #0 ; Function Attrs: nounwind -declare i8* @llvm.visc.createEdge(i8*, i8*, i1, i32, i32) #0 +declare i8* @llvm.hpvm.createEdge(i8*, i8*, i1, i32, i32) #0 ; Function Attrs: nounwind -declare i8* @llvm.visc.launch(i8*, i8*) #0 +declare i8* @llvm.hpvm.launch(i8*, i8*) #0 ; Function Attrs: nounwind -declare void @llvm.visc.wait(i8*) #0 +declare void @llvm.hpvm.wait(i8*) #0 ; Function Attrs: nounwind -declare i8* @llvm.visc.getNode() #0 +declare i8* @llvm.hpvm.getNode() #0 ; Function Attrs: nounwind -declare i8* @llvm.visc.getParentNode(i8*) #0 +declare i8* @llvm.hpvm.getParentNode(i8*) #0 ; Function Attrs: nounwind -declare i32 @llvm.visc.getNumDims(i8*) #0 +declare i32 @llvm.hpvm.getNumDims(i8*) #0 ; Function Attrs: nounwind -declare void @llvm.visc.bind.input(i8*, i32, i32) +declare void @llvm.hpvm.bind.input(i8*, i32, i32) ; Function Attrs: nounwind -declare void @llvm.visc.bind.output(i8*, i32, i32) +declare void @llvm.hpvm.bind.output(i8*, i32, i32) ; Function Attrs: nounwind uwtable define i32 @main(i32 %argc, i8** nocapture %argv) #1 { entry: - call void @llvm.visc.init() + call void @llvm.hpvm.init() %in.addr = alloca %struct.arg %arrayidx = getelementptr inbounds i8** %argv, i64 1 %0 = load i8** %arrayidx, align 8, !tbaa !0 @@ -56,21 +56,21 @@ entry: %1 = bitcast %struct.arg* %in.addr to i32* store i32 %conv.i, i32* %1 %args = bitcast %struct.arg* %in.addr to i8* - %graphID = call i8* @llvm.visc.launch(i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args) + %graphID = call i8* @llvm.hpvm.launch(i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args) %call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %conv.i) #0 - call void @llvm.visc.wait(i8* %graphID) + call void @llvm.hpvm.wait(i8* %graphID) %2 = getelementptr %struct.arg* %in.addr, i32 0, i32 1 %outputstruct = load %rtype* %2 %output = extractvalue %rtype %outputstruct, 0 %call2 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %output) #0 - call void @llvm.visc.cleanup() + call void @llvm.hpvm.cleanup() ret i32 0 } define %rtype @producer(i32 %id) { %sum = add i32 4, %id - %this_node = call i8* @llvm.visc.getNode() - %numDim = call i32 @llvm.visc.getNumDims(i8* %this_node) + %this_node = call i8* @llvm.hpvm.getNode() + %numDim = call i32 @llvm.hpvm.getNumDims(i8* %this_node) %sum2 = add i32 %sum, %numDim %output = insertvalue %rtype undef, i32 %sum, 0 ret %rtype %output @@ -83,11 +83,11 @@ define %rtype @consumer(i32 %id) { } define %rtype @Root(i32 %id) { - %p_node = call i8* @llvm.visc.createNode(i8* bitcast (%rtype (i32)* @producer to i8*)) - %c_node = call i8* @llvm.visc.createNode(i8* bitcast (%rtype (i32)* @consumer to i8*)) - %edge = call i8* @llvm.visc.createEdge(i8* %p_node, i8* %c_node, i1 false, i32 0, i32 0) - call void @llvm.visc.bind.input(i8* %p_node, i32 0, i32 0) - call void @llvm.visc.bind.output(i8* %c_node, i32 0, i32 0) + %p_node = call i8* @llvm.hpvm.createNode(i8* bitcast (%rtype (i32)* @producer to i8*)) + %c_node = call i8* @llvm.hpvm.createNode(i8* bitcast (%rtype (i32)* @consumer to i8*)) + %edge = call i8* @llvm.hpvm.createEdge(i8* %p_node, i8* %c_node, i1 false, i32 0, i32 0) + call void @llvm.hpvm.bind.input(i8* %p_node, i32 0, i32 0) + call void @llvm.hpvm.bind.output(i8* %c_node, i32 0, i32 0) ret %rtype zeroinitializer } diff --git a/hpvm/test/unitTests/temp/queryNumNodeInst.ll b/hpvm/test/unitTests/temp/queryNumNodeInst.ll index 48add92f16125bdf33c9691896a8b7259339fe78..07418ff725c277e2e8adbe6a39d8831e2b77bc59 100644 --- a/hpvm/test/unitTests/temp/queryNumNodeInst.ll +++ b/hpvm/test/unitTests/temp/queryNumNodeInst.ll @@ -1,5 +1,5 @@ ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S < %s -; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll +; RUN: llvm-link %t.ll ~/current-src/projects/hpvm-rt/hpvm-rt.ll -S -o %t.linked.ll ; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -lrt -o %t.bin ; RUN: %t.bin 5 ; ModuleID = '/home/psrivas2/current-test/unitTests/twoNode.ll' @@ -12,48 +12,48 @@ target triple = "x86_64-unknown-linux-gnu" @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 ; Function Attrs: nounwind -declare void @llvm.visc.init() #1 +declare void @llvm.hpvm.init() #1 ; Function Attrs: nounwind -declare void @llvm.visc.cleanup() #1 +declare void @llvm.hpvm.cleanup() #1 ; Function Attrs: nounwind -declare i8* @llvm.visc.createNode(i8*) #0 +declare i8* @llvm.hpvm.createNode(i8*) #0 ; Function Attrs: nounwind -declare i8* @llvm.visc.createNode1D(i8*, i32) #0 +declare i8* @llvm.hpvm.createNode1D(i8*, i32) #0 ; Function Attrs: nounwind -declare i8* @llvm.visc.createEdge(i8*, i8*, i1, i32, i32) #0 +declare i8* @llvm.hpvm.createEdge(i8*, i8*, i1, i32, i32) #0 ; Function Attrs: nounwind -declare i8* @llvm.visc.launch(i8*, i8*) #0 +declare i8* @llvm.hpvm.launch(i8*, i8*) #0 ; Function Attrs: nounwind -declare void @llvm.visc.wait(i8*) #0 +declare void @llvm.hpvm.wait(i8*) #0 ; Function Attrs: nounwind -declare i8* @llvm.visc.getNode() #0 +declare i8* @llvm.hpvm.getNode() #0 ; Function Attrs: nounwind -declare i8* @llvm.visc.getParentNode(i8*) #0 +declare i8* @llvm.hpvm.getParentNode(i8*) #0 ; Function Attrs: nounwind -declare i32 @llvm.visc.getNumDims(i8*) #0 +declare i32 @llvm.hpvm.getNumDims(i8*) #0 ; Function Attrs: nounwind -declare i32 @llvm.visc.getNumNodeInstances.x(i8*) #0 +declare i32 @llvm.hpvm.getNumNodeInstances.x(i8*) #0 ; Function Attrs: nounwind -declare void @llvm.visc.bind.input(i8*, i32, i32) +declare void @llvm.hpvm.bind.input(i8*, i32, i32) ; Function Attrs: nounwind -declare void @llvm.visc.bind.output(i8*, i32, i32) +declare void @llvm.hpvm.bind.output(i8*, i32, i32) ; Function Attrs: nounwind uwtable define i32 @main(i32 %argc, i8** nocapture %argv) #1 { entry: - call void @llvm.visc.init() + call void @llvm.hpvm.init() %in.addr = alloca %struct.arg %arrayidx = getelementptr inbounds i8** %argv, i64 1 %0 = load i8** %arrayidx, align 8, !tbaa !0 @@ -62,21 +62,21 @@ entry: %1 = bitcast %struct.arg* %in.addr to i32* store i32 %conv.i, i32* %1 %args = bitcast %struct.arg* %in.addr to i8* - %graphID = call i8* @llvm.visc.launch(i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args) + %graphID = call i8* @llvm.hpvm.launch(i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args) %call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %conv.i) #0 - call void @llvm.visc.wait(i8* %graphID) + call void @llvm.hpvm.wait(i8* %graphID) %2 = getelementptr %struct.arg* %in.addr, i32 0, i32 1 %outputstruct = load %rtype* %2 %output = extractvalue %rtype %outputstruct, 0 %call2 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %output) #0 - call void @llvm.visc.cleanup() + call void @llvm.hpvm.cleanup() ret i32 0 } define %rtype @producer(i32 %id) { %sum = add i32 4, %id - %this_node = call i8* @llvm.visc.getNode() - %dim = call i32 @llvm.visc.getNumNodeInstances.x(i8* %this_node) + %this_node = call i8* @llvm.hpvm.getNode() + %dim = call i32 @llvm.hpvm.getNumNodeInstances.x(i8* %this_node) %sum2 = add i32 %sum, %dim %output = insertvalue %rtype undef, i32 %sum2, 0 ret %rtype %output @@ -89,11 +89,11 @@ define %rtype @consumer(i32 %id) { } define %rtype @Root(i32 %dimension) { - %p_node = call i8* @llvm.visc.createNode1D(i8* bitcast (%rtype (i32)* @producer to i8*), i32 %dimension) - %c_node = call i8* @llvm.visc.createNode(i8* bitcast (%rtype (i32)* @consumer to i8*)) - %edge = call i8* @llvm.visc.createEdge(i8* %p_node, i8* %c_node, i1 false, i32 0, i32 0) - call void @llvm.visc.bind.input(i8* %p_node, i32 0, i32 0) - call void @llvm.visc.bind.output(i8* %c_node, i32 0, i32 0) + %p_node = call i8* @llvm.hpvm.createNode1D(i8* bitcast (%rtype (i32)* @producer to i8*), i32 %dimension) + %c_node = call i8* @llvm.hpvm.createNode(i8* bitcast (%rtype (i32)* @consumer to i8*)) + %edge = call i8* @llvm.hpvm.createEdge(i8* %p_node, i8* %c_node, i1 false, i32 0, i32 0) + call void @llvm.hpvm.bind.input(i8* %p_node, i32 0, i32 0) + call void @llvm.hpvm.bind.output(i8* %c_node, i32 0, i32 0) ret %rtype zeroinitializer } diff --git a/hpvm/test/unitTests/temp/singleNode.ll b/hpvm/test/unitTests/temp/singleNode.ll index 20713e955fb457acec2e2968d1b4a2ae61396fe0..99e53181317a6b27a83916682bcf1457895c0bfc 100644 --- a/hpvm/test/unitTests/temp/singleNode.ll +++ b/hpvm/test/unitTests/temp/singleNode.ll @@ -1,5 +1,5 @@ ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S < %s -; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll +; RUN: llvm-link %t.ll ~/current-src/projects/hpvm-rt/hpvm-rt.ll -S -o %t.linked.ll ; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -lrt -o %t.bin ; RUN: %t.bin 5 ; ModuleID = '/home/psrivas2/current-test/unitTests/singleNode.ll' @@ -12,43 +12,43 @@ target triple = "x86_64-unknown-linux-gnu" @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 ; Function Attrs: nounwind -declare void @llvm.visc.init() #1 +declare void @llvm.hpvm.init() #1 ; Function Attrs: nounwind -declare void @llvm.visc.cleanup() #1 +declare void @llvm.hpvm.cleanup() #1 ; Function Attrs: nounwind -declare i8* @llvm.visc.createNode(i8*) #0 +declare i8* @llvm.hpvm.createNode(i8*) #0 ; Function Attrs: nounwind -declare i8* @llvm.visc.createEdge(i8*, i8*, i1, i32, i32) #0 +declare i8* @llvm.hpvm.createEdge(i8*, i8*, i1, i32, i32) #0 ; Function Attrs: nounwind -declare i8* @llvm.visc.launch(i8*, i8*) #0 +declare i8* @llvm.hpvm.launch(i8*, i8*) #0 ; Function Attrs: nounwind -declare void @llvm.visc.wait(i8*) #0 +declare void @llvm.hpvm.wait(i8*) #0 ; Function Attrs: nounwind -declare void @llvm.visc.bind.input(i8*, i32, i32) +declare void @llvm.hpvm.bind.input(i8*, i32, i32) ; Function Attrs: nounwind -declare void @llvm.visc.bind.output(i8*, i32, i32) +declare void @llvm.hpvm.bind.output(i8*, i32, i32) ; Function Attrs: nounwind uwtable define i32 @main(i32 %argc, i8** nocapture %argv) #1 { entry: - call void @llvm.visc.init() + call void @llvm.hpvm.init() %in.addr = alloca %struct.arg %arrayidx = getelementptr inbounds i8** %argv, i64 1 %0 = load i8** %arrayidx, align 8, !tbaa !0 %call.i = tail call i64 @strtol(i8* nocapture %0, i8** null, i32 10) #0 %conv.i = trunc i64 %call.i to i32 %args = bitcast %struct.arg* %in.addr to i8* - %graphID = call i8* @llvm.visc.launch(i8* bitcast (%rtype ()* @Root to i8*), i8* %args) + %graphID = call i8* @llvm.hpvm.launch(i8* bitcast (%rtype ()* @Root to i8*), i8* %args) %call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %conv.i) #0 - call void @llvm.visc.wait(i8* %graphID) - call void @llvm.visc.cleanup() + call void @llvm.hpvm.wait(i8* %graphID) + call void @llvm.hpvm.cleanup() ret i32 0 } @@ -59,8 +59,8 @@ define %rtype @foo() { } define %rtype @Root() { - %node = call i8* @llvm.visc.createNode(i8* bitcast (%rtype ()* @foo to i8*)) - call void @llvm.visc.bind.output(i8* %node, i32 0, i32 0) + %node = call i8* @llvm.hpvm.createNode(i8* bitcast (%rtype ()* @foo to i8*)) + call void @llvm.hpvm.bind.output(i8* %node, i32 0, i32 0) ret %rtype zeroinitializer } diff --git a/hpvm/test/unitTests/temp/singleNodeStream.ll b/hpvm/test/unitTests/temp/singleNodeStream.ll index fce75df6714240286e9a676e40e37c3f14e537a6..aa0243603c420a21f51f9842d467f9da814f1814 100644 --- a/hpvm/test/unitTests/temp/singleNodeStream.ll +++ b/hpvm/test/unitTests/temp/singleNodeStream.ll @@ -1,5 +1,5 @@ ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S < %s -; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll +; RUN: llvm-link %t.ll ~/current-src/projects/hpvm-rt/hpvm-rt.ll -S -o %t.linked.ll ; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -lrt -o %t.bin ; RUN: %t.bin 5 ; ModuleID = '/home/psrivas2/current-test/unitTests/twoNodeConnect.ll' @@ -14,39 +14,39 @@ target triple = "x86_64-unknown-linux-gnu" @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 ; Function Attrs: nounwind -declare void @llvm.visc.init() #1 +declare void @llvm.hpvm.init() #1 ; Function Attrs: nounwind -declare void @llvm.visc.cleanup() #1 +declare void @llvm.hpvm.cleanup() #1 ; Function Attrs: nounwind -declare i8* @llvm.visc.createNode(i8*) #0 +declare i8* @llvm.hpvm.createNode(i8*) #0 ; Function Attrs: nounwind -declare i8* @llvm.visc.createEdge(i8*, i8*, i1, i32, i32, i1) #0 +declare i8* @llvm.hpvm.createEdge(i8*, i8*, i1, i32, i32, i1) #0 ; Function Attrs: nounwind -declare i8* @llvm.visc.launch(i8*, i8*, i1) #0 +declare i8* @llvm.hpvm.launch(i8*, i8*, i1) #0 ; Function Attrs: nounwind -declare void @llvm.visc.push(i8*, i8*) #0 +declare void @llvm.hpvm.push(i8*, i8*) #0 ; Function Attrs: nounwind -declare i8* @llvm.visc.pop(i8*) #0 +declare i8* @llvm.hpvm.pop(i8*) #0 ; Function Attrs: nounwind -declare void @llvm.visc.wait(i8*) #0 +declare void @llvm.hpvm.wait(i8*) #0 ; Function Attrs: nounwind -declare void @llvm.visc.bind.input(i8*, i32, i32, i1) +declare void @llvm.hpvm.bind.input(i8*, i32, i32, i1) ; Function Attrs: nounwind -declare void @llvm.visc.bind.output(i8*, i32, i32, i1) +declare void @llvm.hpvm.bind.output(i8*, i32, i32, i1) ; Function Attrs: nounwind uwtable define i32 @main(i32 %argc, i8** nocapture %argv) #1 { entry: - call void @llvm.visc.init() + call void @llvm.hpvm.init() %in.addr = alloca %struct.arg %num = alloca i32 %arrayidx = getelementptr inbounds i8** %argv, i64 1 @@ -60,27 +60,27 @@ entry: %args = bitcast %struct.arg* %in.addr to i8* ; Launch the pipeline - %graphID = call i8* @llvm.visc.launch(i8* bitcast (%rptype (i32*, i64)* @Root to i8*), i8* %args, i1 1) + %graphID = call i8* @llvm.hpvm.launch(i8* bitcast (%rptype (i32*, i64)* @Root to i8*), i8* %args, i1 1) ; Push arguments into the pipeline - call void @llvm.visc.push(i8* %graphID, i8* %args) - call void @llvm.visc.push(i8* %graphID, i8* %args) - call void @llvm.visc.push(i8* %graphID, i8* %args) - call void @llvm.visc.push(i8* %graphID, i8* %args) + call void @llvm.hpvm.push(i8* %graphID, i8* %args) + call void @llvm.hpvm.push(i8* %graphID, i8* %args) + call void @llvm.hpvm.push(i8* %graphID, i8* %args) + call void @llvm.hpvm.push(i8* %graphID, i8* %args) ; Pop out arguments and read the output - %graph_output = call i8* @llvm.visc.pop(i8* %graphID) - %graph_output1 = call i8* @llvm.visc.pop(i8* %graphID) - %graph_output2 = call i8* @llvm.visc.pop(i8* %graphID) - %graph_output3 = call i8* @llvm.visc.pop(i8* %graphID) + %graph_output = call i8* @llvm.hpvm.pop(i8* %graphID) + %graph_output1 = call i8* @llvm.hpvm.pop(i8* %graphID) + %graph_output2 = call i8* @llvm.hpvm.pop(i8* %graphID) + %graph_output3 = call i8* @llvm.hpvm.pop(i8* %graphID) %output.addr = bitcast i8* %graph_output to %rptype* %outputstruct = load %rptype* %output.addr %output = extractvalue %rptype %outputstruct, 0 %output_val = load i32* %output %call2 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([18 x i8]* @out.str, i64 0, i64 0), i32 %output_val) #0 - call void @llvm.visc.wait(i8* %graphID) - call void @llvm.visc.cleanup() + call void @llvm.hpvm.wait(i8* %graphID) + call void @llvm.hpvm.cleanup() ret i32 0 } @@ -95,11 +95,11 @@ define %rptype @producer(i32* %id, i64 %size) { } define %rptype @Root(i32* %id, i64 %size) { - %p_node = call i8* @llvm.visc.createNode(i8* bitcast (%rptype (i32*, i64)* @producer to i8*)) - call void @llvm.visc.bind.input(i8* %p_node, i32 0, i32 0, i1 1) - call void @llvm.visc.bind.input(i8* %p_node, i32 1, i32 1, i1 1) - call void @llvm.visc.bind.output(i8* %p_node, i32 0, i32 0, i1 1) - call void @llvm.visc.bind.output(i8* %p_node, i32 1, i32 1, i1 1) + %p_node = call i8* @llvm.hpvm.createNode(i8* bitcast (%rptype (i32*, i64)* @producer to i8*)) + call void @llvm.hpvm.bind.input(i8* %p_node, i32 0, i32 0, i1 1) + call void @llvm.hpvm.bind.input(i8* %p_node, i32 1, i32 1, i1 1) + call void @llvm.hpvm.bind.output(i8* %p_node, i32 0, i32 0, i1 1) + call void @llvm.hpvm.bind.output(i8* %p_node, i32 1, i32 1, i1 1) ret %rptype zeroinitializer } diff --git a/hpvm/test/unitTests/temp/twoLaunch.ll b/hpvm/test/unitTests/temp/twoLaunch.ll index 48c973a7e6f1cc5422fffd8d9e4ae0a0e1a06bf9..ee602f58d82f004a7b19bf54e55e1c0759c17bef 100644 --- a/hpvm/test/unitTests/temp/twoLaunch.ll +++ b/hpvm/test/unitTests/temp/twoLaunch.ll @@ -1,5 +1,5 @@ ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S < %s -; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll +; RUN: llvm-link %t.ll ~/current-src/projects/hpvm-rt/hpvm-rt.ll -S -o %t.linked.ll ; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -lrt -o %t.bin ; RUN: %t.bin 5 ; ModuleID = '/home/psrivas2/current-test/unitTests/singleNode.ll' @@ -12,33 +12,33 @@ target triple = "x86_64-unknown-linux-gnu" @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 ; Function Attrs: nounwind -declare void @llvm.visc.init() #1 +declare void @llvm.hpvm.init() #1 ; Function Attrs: nounwind -declare void @llvm.visc.cleanup() #1 +declare void @llvm.hpvm.cleanup() #1 ; Function Attrs: nounwind -declare i8* @llvm.visc.createNode(i8*) #0 +declare i8* @llvm.hpvm.createNode(i8*) #0 ; Function Attrs: nounwind -declare i8* @llvm.visc.createEdge(i8*, i8*, i1, i32, i32) #0 +declare i8* @llvm.hpvm.createEdge(i8*, i8*, i1, i32, i32) #0 ; Function Attrs: nounwind -declare i8* @llvm.visc.launch(i8*, i8*) #0 +declare i8* @llvm.hpvm.launch(i8*, i8*) #0 ; Function Attrs: nounwind -declare void @llvm.visc.wait(i8*) #0 +declare void @llvm.hpvm.wait(i8*) #0 ; Function Attrs: nounwind -declare void @llvm.visc.bind.input(i8*, i32, i32) +declare void @llvm.hpvm.bind.input(i8*, i32, i32) ; Function Attrs: nounwind -declare void @llvm.visc.bind.output(i8*, i32, i32) +declare void @llvm.hpvm.bind.output(i8*, i32, i32) ; Function Attrs: nounwind uwtable define i32 @main(i32 %argc, i8** nocapture %argv) #1 { entry: - call void @llvm.visc.init() + call void @llvm.hpvm.init() %in.addr_1 = alloca %struct.arg %in.addr_2= alloca %struct.arg %arrayidx = getelementptr inbounds i8** %argv, i64 1 @@ -47,12 +47,12 @@ entry: %conv.i = trunc i64 %call.i to i32 %args_1 = bitcast %struct.arg* %in.addr_1 to i8* %args_2 = bitcast %struct.arg* %in.addr_2 to i8* - %graphID_1 = call i8* @llvm.visc.launch(i8* bitcast (%rtype ()* @Root_1 to i8*), i8* %args_1) - %graphID_2 = call i8* @llvm.visc.launch(i8* bitcast (%rtype ()* @Root_2 to i8*), i8* %args_2) + %graphID_1 = call i8* @llvm.hpvm.launch(i8* bitcast (%rtype ()* @Root_1 to i8*), i8* %args_1) + %graphID_2 = call i8* @llvm.hpvm.launch(i8* bitcast (%rtype ()* @Root_2 to i8*), i8* %args_2) %call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %conv.i) #0 - call void @llvm.visc.wait(i8* %graphID_1) - call void @llvm.visc.wait(i8* %graphID_2) - call void @llvm.visc.cleanup() + call void @llvm.hpvm.wait(i8* %graphID_1) + call void @llvm.hpvm.wait(i8* %graphID_2) + call void @llvm.hpvm.cleanup() ret i32 0 } @@ -70,14 +70,14 @@ define %rtype @foo_2() { } define %rtype @Root_1() { - %node = call i8* @llvm.visc.createNode(i8* bitcast (%rtype ()* @foo_1 to i8*)) - call void @llvm.visc.bind.output(i8* %node, i32 0, i32 0) + %node = call i8* @llvm.hpvm.createNode(i8* bitcast (%rtype ()* @foo_1 to i8*)) + call void @llvm.hpvm.bind.output(i8* %node, i32 0, i32 0) ret %rtype zeroinitializer } define %rtype @Root_2() { - %node = call i8* @llvm.visc.createNode(i8* bitcast (%rtype ()* @foo_2 to i8*)) - call void @llvm.visc.bind.output(i8* %node, i32 0, i32 0) + %node = call i8* @llvm.hpvm.createNode(i8* bitcast (%rtype ()* @foo_2 to i8*)) + call void @llvm.hpvm.bind.output(i8* %node, i32 0, i32 0) ret %rtype zeroinitializer } diff --git a/hpvm/test/unitTests/temp/twoNode.ll b/hpvm/test/unitTests/temp/twoNode.ll index 5e2899830b835ff50c9d2d8e4157451d4bd26f7f..74e4c64d599f7204b375743687c6da2b7ed8c9f6 100644 --- a/hpvm/test/unitTests/temp/twoNode.ll +++ b/hpvm/test/unitTests/temp/twoNode.ll @@ -1,5 +1,5 @@ ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S < %s -; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll +; RUN: llvm-link %t.ll ~/current-src/projects/hpvm-rt/hpvm-rt.ll -S -o %t.linked.ll ; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -lrt -o %t.bin ; RUN: %t.bin 5 ; ModuleID = '/home/psrivas2/current-test/unitTests/twoNode.ll' @@ -11,33 +11,33 @@ target triple = "x86_64-unknown-linux-gnu" @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 ; Function Attrs: nounwind -declare void @llvm.visc.init() #1 +declare void @llvm.hpvm.init() #1 ; Function Attrs: nounwind -declare void @llvm.visc.cleanup() #1 +declare void @llvm.hpvm.cleanup() #1 ; Function Attrs: nounwind -declare i8* @llvm.visc.createNode(i8*) #0 +declare i8* @llvm.hpvm.createNode(i8*) #0 ; Function Attrs: nounwind -declare i8* @llvm.visc.createEdge(i8*, i8*, i1, i32, i32) #0 +declare i8* @llvm.hpvm.createEdge(i8*, i8*, i1, i32, i32) #0 ; Function Attrs: nounwind -declare i8* @llvm.visc.launch(i8*, i8*) #0 +declare i8* @llvm.hpvm.launch(i8*, i8*) #0 ; Function Attrs: nounwind -declare void @llvm.visc.wait(i8*) #0 +declare void @llvm.hpvm.wait(i8*) #0 ; Function Attrs: nounwind -declare void @llvm.visc.bind.input(i8*, i32, i32) +declare void @llvm.hpvm.bind.input(i8*, i32, i32) ; Function Attrs: nounwind -declare void @llvm.visc.bind.output(i8*, i32, i32) +declare void @llvm.hpvm.bind.output(i8*, i32, i32) ; Function Attrs: nounwind uwtable define i32 @main(i32 %argc, i8** nocapture %argv) #1 { entry: - call void @llvm.visc.init() + call void @llvm.hpvm.init() %in.addr = alloca %struct.arg %arrayidx = getelementptr inbounds i8** %argv, i64 1 %0 = load i8** %arrayidx, align 8, !tbaa !0 @@ -46,10 +46,10 @@ entry: %1 = bitcast %struct.arg* %in.addr to i32* store i32 %conv.i, i32* %1 %args = bitcast %struct.arg* %in.addr to i8* - %graphID = call i8* @llvm.visc.launch(i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args) + %graphID = call i8* @llvm.hpvm.launch(i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args) %call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %conv.i) #0 - call void @llvm.visc.wait(i8* %graphID) - call void @llvm.visc.cleanup() + call void @llvm.hpvm.wait(i8* %graphID) + call void @llvm.hpvm.cleanup() ret i32 0 } @@ -66,10 +66,10 @@ define %rtype @consumer(i32 %id) { } define %rtype @Root(i32 %id) { - %p_node = call i8* @llvm.visc.createNode(i8* bitcast (%rtype ()* @producer to i8*)) - %c_node = call i8* @llvm.visc.createNode(i8* bitcast (%rtype (i32)* @consumer to i8*)) - %edge = call i8* @llvm.visc.createEdge(i8* %p_node, i8* %c_node, i1 false, i32 0, i32 0) - call void @llvm.visc.bind.output(i8* %c_node, i32 0, i32 0) + %p_node = call i8* @llvm.hpvm.createNode(i8* bitcast (%rtype ()* @producer to i8*)) + %c_node = call i8* @llvm.hpvm.createNode(i8* bitcast (%rtype (i32)* @consumer to i8*)) + %edge = call i8* @llvm.hpvm.createEdge(i8* %p_node, i8* %c_node, i1 false, i32 0, i32 0) + call void @llvm.hpvm.bind.output(i8* %c_node, i32 0, i32 0) ret %rtype zeroinitializer } diff --git a/hpvm/test/unitTests/temp/twoNodeConnect.ll b/hpvm/test/unitTests/temp/twoNodeConnect.ll index 06652b94e02c2cac66ab4a07e88dec0a04da49f8..6b23ad691bacb42c39fe681967d4c584179644f1 100644 --- a/hpvm/test/unitTests/temp/twoNodeConnect.ll +++ b/hpvm/test/unitTests/temp/twoNodeConnect.ll @@ -1,5 +1,5 @@ ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S < %s -; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll +; RUN: llvm-link %t.ll ~/current-src/projects/hpvm-rt/hpvm-rt.ll -S -o %t.linked.ll ; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -lrt -o %t.bin ; RUN: %t.bin 5 ; ModuleID = '/home/psrivas2/current-test/unitTests/twoNodeConnect.ll' @@ -11,33 +11,33 @@ target triple = "x86_64-unknown-linux-gnu" @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 ; Function Attrs: nounwind -declare void @llvm.visc.init() #1 +declare void @llvm.hpvm.init() #1 ; Function Attrs: nounwind -declare void @llvm.visc.cleanup() #1 +declare void @llvm.hpvm.cleanup() #1 ; Function Attrs: nounwind -declare i8* @llvm.visc.createNode(i8*) #0 +declare i8* @llvm.hpvm.createNode(i8*) #0 ; Function Attrs: nounwind -declare i8* @llvm.visc.createEdge(i8*, i8*, i1, i32, i32) #0 +declare i8* @llvm.hpvm.createEdge(i8*, i8*, i1, i32, i32) #0 ; Function Attrs: nounwind -declare i8* @llvm.visc.launch(i8*, i8*) #0 +declare i8* @llvm.hpvm.launch(i8*, i8*) #0 ; Function Attrs: nounwind -declare void @llvm.visc.wait(i8*) #0 +declare void @llvm.hpvm.wait(i8*) #0 ; Function Attrs: nounwind -declare void @llvm.visc.bind.input(i8*, i32, i32) +declare void @llvm.hpvm.bind.input(i8*, i32, i32) ; Function Attrs: nounwind -declare void @llvm.visc.bind.output(i8*, i32, i32) +declare void @llvm.hpvm.bind.output(i8*, i32, i32) ; Function Attrs: nounwind uwtable define i32 @main(i32 %argc, i8** nocapture %argv) #1 { entry: - call void @llvm.visc.init() + call void @llvm.hpvm.init() %in.addr = alloca %struct.arg %arrayidx = getelementptr inbounds i8** %argv, i64 1 %0 = load i8** %arrayidx, align 8, !tbaa !0 @@ -46,14 +46,14 @@ entry: %1 = bitcast %struct.arg* %in.addr to i32* store i32 %conv.i, i32* %1 %args = bitcast %struct.arg* %in.addr to i8* - %graphID = call i8* @llvm.visc.launch(i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args) + %graphID = call i8* @llvm.hpvm.launch(i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args) %call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %conv.i) #0 - call void @llvm.visc.wait(i8* %graphID) + call void @llvm.hpvm.wait(i8* %graphID) %2 = getelementptr %struct.arg* %in.addr, i32 0, i32 1 %outputstruct = load %rtype* %2 %output = extractvalue %rtype %outputstruct, 0 %call2 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %output) #0 - call void @llvm.visc.cleanup() + call void @llvm.hpvm.cleanup() ret i32 0 } @@ -70,11 +70,11 @@ define %rtype @consumer(i32 %id) { } define %rtype @Root(i32 %id) { - %p_node = call i8* @llvm.visc.createNode(i8* bitcast (%rtype (i32)* @producer to i8*)) - %c_node = call i8* @llvm.visc.createNode(i8* bitcast (%rtype (i32)* @consumer to i8*)) - %edge = call i8* @llvm.visc.createEdge(i8* %p_node, i8* %c_node, i1 false, i32 0, i32 0) - call void @llvm.visc.bind.input(i8* %p_node, i32 0, i32 0) - call void @llvm.visc.bind.output(i8* %c_node, i32 0, i32 0) + %p_node = call i8* @llvm.hpvm.createNode(i8* bitcast (%rtype (i32)* @producer to i8*)) + %c_node = call i8* @llvm.hpvm.createNode(i8* bitcast (%rtype (i32)* @consumer to i8*)) + %edge = call i8* @llvm.hpvm.createEdge(i8* %p_node, i8* %c_node, i1 false, i32 0, i32 0) + call void @llvm.hpvm.bind.input(i8* %p_node, i32 0, i32 0) + call void @llvm.hpvm.bind.output(i8* %c_node, i32 0, i32 0) ret %rtype zeroinitializer } diff --git a/hpvm/test/unitTests/temp/twoNodeQuery.ll b/hpvm/test/unitTests/temp/twoNodeQuery.ll index 2e1ea0dba4659d92b9c1b0600732748c87571671..247d1830dadff69ac5380b939d26c5f850bc08ac 100644 --- a/hpvm/test/unitTests/temp/twoNodeQuery.ll +++ b/hpvm/test/unitTests/temp/twoNodeQuery.ll @@ -1,5 +1,5 @@ ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S < %s -; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll +; RUN: llvm-link %t.ll ~/current-src/projects/hpvm-rt/hpvm-rt.ll -S -o %t.linked.ll ; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -lrt -o %t.bin ; RUN: %t.bin 5 ; ModuleID = '/home/psrivas2/current-test/unitTests/twoNodeQuery.ll' @@ -11,42 +11,42 @@ target triple = "x86_64-unknown-linux-gnu" @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 ; Function Attrs: nounwind -declare void @llvm.visc.init() #1 +declare void @llvm.hpvm.init() #1 ; Function Attrs: nounwind -declare void @llvm.visc.cleanup() #1 +declare void @llvm.hpvm.cleanup() #1 ; Function Attrs: nounwind -declare i8* @llvm.visc.createNode(i8*) #0 +declare i8* @llvm.hpvm.createNode(i8*) #0 ; Function Attrs: nounwind -declare i8* @llvm.visc.createEdge(i8*, i8*, i1, i32, i32) #0 +declare i8* @llvm.hpvm.createEdge(i8*, i8*, i1, i32, i32) #0 ; Function Attrs: nounwind -declare i8* @llvm.visc.launch(i8*, i8*) #0 +declare i8* @llvm.hpvm.launch(i8*, i8*) #0 ; Function Attrs: nounwind -declare void @llvm.visc.wait(i8*) #0 +declare void @llvm.hpvm.wait(i8*) #0 ; Function Attrs: nounwind -declare void @llvm.visc.bind.input(i8*, i32, i32) +declare void @llvm.hpvm.bind.input(i8*, i32, i32) ; Function Attrs: nounwind -declare void @llvm.visc.bind.output(i8*, i32, i32) +declare void @llvm.hpvm.bind.output(i8*, i32, i32) ; Function Attrs: nounwind -declare i8* @llvm.visc.getNode() #0 +declare i8* @llvm.hpvm.getNode() #0 ; Function Attrs: nounwind -declare i8* @llvm.visc.getParentNode(i8*) #0 +declare i8* @llvm.hpvm.getParentNode(i8*) #0 ; Function Attrs: nounwind -declare i32 @llvm.visc.getNumDims(i8*) #0 +declare i32 @llvm.hpvm.getNumDims(i8*) #0 ; Function Attrs: nounwind uwtable define i32 @main(i32 %argc, i8** nocapture %argv) #1 { entry: - call void @llvm.visc.init() + call void @llvm.hpvm.init() %in.addr = alloca %struct.arg %arrayidx = getelementptr inbounds i8** %argv, i64 1 %0 = load i8** %arrayidx, align 8, !tbaa !0 @@ -55,21 +55,21 @@ entry: %1 = bitcast %struct.arg* %in.addr to i32* store i32 %conv.i, i32* %1 %args = bitcast %struct.arg* %in.addr to i8* - %graphID = call i8* @llvm.visc.launch(i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args) + %graphID = call i8* @llvm.hpvm.launch(i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args) %call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %conv.i) #0 - call void @llvm.visc.wait(i8* %graphID) + call void @llvm.hpvm.wait(i8* %graphID) %2 = getelementptr %struct.arg* %in.addr, i32 0, i32 1 %outputstruct = load %rtype* %2 %output = extractvalue %rtype %outputstruct, 0 %call2 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %output) #0 - call void @llvm.visc.cleanup() + call void @llvm.hpvm.cleanup() ret i32 0 } define %rtype @producer(i32 %id) { %sum = add i32 4, %id - %this_node = call i8* @llvm.visc.getNode() - %numDim = call i32 @llvm.visc.getNumDims(i8* %this_node) + %this_node = call i8* @llvm.hpvm.getNode() + %numDim = call i32 @llvm.hpvm.getNumDims(i8* %this_node) %sum2 = add i32 %sum, %numDim %output = insertvalue %rtype undef, i32 %sum, 0 ret %rtype %output @@ -82,11 +82,11 @@ define %rtype @consumer(i32 %id) { } define %rtype @Root(i32 %id) { - %p_node = call i8* @llvm.visc.createNode(i8* bitcast (%rtype (i32)* @producer to i8*)) - %c_node = call i8* @llvm.visc.createNode(i8* bitcast (%rtype (i32)* @consumer to i8*)) - %edge = call i8* @llvm.visc.createEdge(i8* %p_node, i8* %c_node, i1 false, i32 0, i32 0) - call void @llvm.visc.bind.input(i8* %p_node, i32 0, i32 0) - call void @llvm.visc.bind.output(i8* %c_node, i32 0, i32 0) + %p_node = call i8* @llvm.hpvm.createNode(i8* bitcast (%rtype (i32)* @producer to i8*)) + %c_node = call i8* @llvm.hpvm.createNode(i8* bitcast (%rtype (i32)* @consumer to i8*)) + %edge = call i8* @llvm.hpvm.createEdge(i8* %p_node, i8* %c_node, i1 false, i32 0, i32 0) + call void @llvm.hpvm.bind.input(i8* %p_node, i32 0, i32 0) + call void @llvm.hpvm.bind.output(i8* %c_node, i32 0, i32 0) ret %rtype zeroinitializer } diff --git a/hpvm/test/unitTests/temp/twoNodeStream.ll b/hpvm/test/unitTests/temp/twoNodeStream.ll index 6e9925951884775e7ba60bb396a97fd9bc0ef52d..f9820abd19eb7b329b2c7184719d9699b15891e6 100644 --- a/hpvm/test/unitTests/temp/twoNodeStream.ll +++ b/hpvm/test/unitTests/temp/twoNodeStream.ll @@ -1,5 +1,5 @@ ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S < %s -; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll +; RUN: llvm-link %t.ll ~/current-src/projects/hpvm-rt/hpvm-rt.ll -S -o %t.linked.ll ; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -lrt -o %t.bin ; RUN: %t.bin 5 ; ModuleID = '/home/psrivas2/current-test/unitTests/twoNodeConnect.ll' @@ -14,39 +14,39 @@ target triple = "x86_64-unknown-linux-gnu" @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 ; Function Attrs: nounwind -declare void @llvm.visc.init() #1 +declare void @llvm.hpvm.init() #1 ; Function Attrs: nounwind -declare void @llvm.visc.cleanup() #1 +declare void @llvm.hpvm.cleanup() #1 ; Function Attrs: nounwind -declare i8* @llvm.visc.createNode(i8*) #0 +declare i8* @llvm.hpvm.createNode(i8*) #0 ; Function Attrs: nounwind -declare i8* @llvm.visc.createEdge(i8*, i8*, i1, i32, i32, i1) #0 +declare i8* @llvm.hpvm.createEdge(i8*, i8*, i1, i32, i32, i1) #0 ; Function Attrs: nounwind -declare i8* @llvm.visc.launch(i8*, i8*, i1) #0 +declare i8* @llvm.hpvm.launch(i8*, i8*, i1) #0 ; Function Attrs: nounwind -declare void @llvm.visc.push(i8*, i8*) #0 +declare void @llvm.hpvm.push(i8*, i8*) #0 ; Function Attrs: nounwind -declare i8* @llvm.visc.pop(i8*) #0 +declare i8* @llvm.hpvm.pop(i8*) #0 ; Function Attrs: nounwind -declare void @llvm.visc.wait(i8*) #0 +declare void @llvm.hpvm.wait(i8*) #0 ; Function Attrs: nounwind -declare void @llvm.visc.bind.input(i8*, i32, i32, i1) +declare void @llvm.hpvm.bind.input(i8*, i32, i32, i1) ; Function Attrs: nounwind -declare void @llvm.visc.bind.output(i8*, i32, i32, i1) +declare void @llvm.hpvm.bind.output(i8*, i32, i32, i1) ; Function Attrs: nounwind uwtable define i32 @main(i32 %argc, i8** nocapture %argv) #1 { entry: - call void @llvm.visc.init() + call void @llvm.hpvm.init() %in.addr = alloca %struct.arg %num = alloca i32 %arrayidx = getelementptr inbounds i8** %argv, i64 1 @@ -60,21 +60,21 @@ entry: %args = bitcast %struct.arg* %in.addr to i8* ; Launch the pipeline - %graphID = call i8* @llvm.visc.launch(i8* bitcast (%rctype (i32*, i64)* @Root to i8*), i8* %args, i1 1) + %graphID = call i8* @llvm.hpvm.launch(i8* bitcast (%rctype (i32*, i64)* @Root to i8*), i8* %args, i1 1) ; Push arguments into the pipeline - call void @llvm.visc.push(i8* %graphID, i8* %args) + call void @llvm.hpvm.push(i8* %graphID, i8* %args) ; Pop out arguments and read the output - %graph_output = call i8* @llvm.visc.pop(i8* %graphID) + %graph_output = call i8* @llvm.hpvm.pop(i8* %graphID) %output.addr = bitcast i8* %graph_output to %rctype* %outputstruct = load %rctype* %output.addr %output = extractvalue %rctype %outputstruct, 0 %output_val = load i32* %output %call2 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([18 x i8]* @out.str, i64 0, i64 0), i32 %output_val) #0 - call void @llvm.visc.wait(i8* %graphID) - call void @llvm.visc.cleanup() + call void @llvm.hpvm.wait(i8* %graphID) + call void @llvm.hpvm.cleanup() ret i32 0 } @@ -97,14 +97,14 @@ define %rctype @consumer(i32* %id, i64 %size) { } define %rctype @Root(i32* %id, i64 %size) { - %p_node = call i8* @llvm.visc.createNode(i8* bitcast (%rptype (i32*, i64)* @producer to i8*)) - %c_node = call i8* @llvm.visc.createNode(i8* bitcast (%rctype (i32*, i64)* @consumer to i8*)) - %edge = call i8* @llvm.visc.createEdge(i8* %p_node, i8* %c_node, i1 false, i32 0, i32 0, i1 1) - %edge2 = call i8* @llvm.visc.createEdge(i8* %p_node, i8* %c_node, i1 false, i32 1, i32 1, i1 1) - call void @llvm.visc.bind.input(i8* %p_node, i32 0, i32 0, i1 1) - call void @llvm.visc.bind.input(i8* %p_node, i32 1, i32 1, i1 0) - call void @llvm.visc.bind.output(i8* %c_node, i32 0, i32 0, i1 1) - call void @llvm.visc.bind.output(i8* %c_node, i32 1, i32 1, i1 1) + %p_node = call i8* @llvm.hpvm.createNode(i8* bitcast (%rptype (i32*, i64)* @producer to i8*)) + %c_node = call i8* @llvm.hpvm.createNode(i8* bitcast (%rctype (i32*, i64)* @consumer to i8*)) + %edge = call i8* @llvm.hpvm.createEdge(i8* %p_node, i8* %c_node, i1 false, i32 0, i32 0, i1 1) + %edge2 = call i8* @llvm.hpvm.createEdge(i8* %p_node, i8* %c_node, i1 false, i32 1, i32 1, i1 1) + call void @llvm.hpvm.bind.input(i8* %p_node, i32 0, i32 0, i1 1) + call void @llvm.hpvm.bind.input(i8* %p_node, i32 1, i32 1, i1 0) + call void @llvm.hpvm.bind.output(i8* %c_node, i32 0, i32 0, i1 1) + call void @llvm.hpvm.bind.output(i8* %c_node, i32 1, i32 1, i1 1) ret %rctype zeroinitializer }