diff --git a/lib/DFG2LLVM_PROMISE/CMakeLists.txt b/lib/DFG2LLVM_PROMISE/CMakeLists.txt deleted file mode 100644 index 5b5d2677d0e827d590a07898208f00f9392b62a5..0000000000000000000000000000000000000000 --- a/lib/DFG2LLVM_PROMISE/CMakeLists.txt +++ /dev/null @@ -1,12 +0,0 @@ -if(WIN32 OR CYGWIN) - set(LLVM_LINK_COMPONENTS Core Support) -endif() - -add_llvm_loadable_module( LLVMDFG2LLVM_PROMISE - DFG2LLVM_PROMISE.cpp - - DEPENDS - intrinsics_gen - PLUGIN_TOOL - opt - ) diff --git a/lib/DFG2LLVM_PROMISE/DFG2LLVM_PROMISE.cpp b/lib/DFG2LLVM_PROMISE/DFG2LLVM_PROMISE.cpp deleted file mode 100644 index 184f92910ae7e6d60574b0b207bd22a8f7076d8e..0000000000000000000000000000000000000000 --- a/lib/DFG2LLVM_PROMISE/DFG2LLVM_PROMISE.cpp +++ /dev/null @@ -1,1283 +0,0 @@ -//=== DFG2LLVM_PROMISE.cpp ===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -#define ENABLE_ASSERTS - -#define DEBUG_TYPE "DFG2LLVM_PROMISE" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/Module.h" -#include "llvm/Pass.h" -#include "llvm/IR/InstIterator.h" -#include "llvm/Transforms/Utils/ValueMapper.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Cloning.h" -#include "llvm/IRReader/IRReader.h" -#include "llvm/Linker/Linker.h" -#include "llvm/Support/SourceMgr.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/IR/Attributes.h" -#include "llvm-c/Core.h" -#include "llvm/SupportVISC/VISCTimer.h" -#include "llvm/SupportVISC/DFG2LLVM.h" -#include "llvm/InPlaceDFG/InPlaceDFGAnalysis.h" -#include <sstream> -#include <fstream> - -using namespace llvm; -using namespace builddfg; -using namespace dfg2llvm; - -namespace { - -cl::opt<std::string> QuantizationInputsFilename( - "quantization-levels-filename", - cl::desc("<PROMISE quantization levels input file (path)>"), - cl::value_desc("filename"), - cl::Required); - -// Helper class declarations - -// State machine definition for pattern identification - -/* An assumption is made for the PROMISE simulator: * - * a leaf node will contain consequtive operations that will map to a * - * single PROMISE simulator call * - - * To alleviate that, the states that correspond to valid patterns * - * - (FullyConnectedLayer_(2,3,x, ConvilutionLayer_(2,3,4,x)) * - * can invoke codeGen when detecting the beginning of a new pattern, then * - * clear the collected IIs and Args, then go to initial and invoke its * - * transition. */ - -class AbstractState; - -class CodeGenStateMachine { -private: - Module *M; - Module *RtM; - - std::ifstream &qin; // Quantization levels input stream reference - std::vector<Value*> Args; - std::vector<IntrinsicInst*> IIs; - AbstractState *current; - -public: - CodeGenStateMachine(Module *, Module *, std::ifstream &); - - void setCurrent(AbstractState *s) { - current = s; - } - - void transition(IntrinsicInst *II); - - Module *getModule() { - return M; - } - - void getNextQuantizationLevel(float &ql) { - qin >> ql; - } - - void addArgument(Value *Arg) { - Args.push_back(Arg); - } - - void addIntrinsicInst(IntrinsicInst *II) { - IIs.push_back(II); - } - - IntrinsicInst *getIntrinsicInstAt(unsigned idx) { - return IIs[idx]; - } - - void codeGen(); - -}; - -class AbstractState { -public: - enum ID - { - INITIAL_STATE, - FULLY_CONNECTED_LAYER_1, - FULLY_CONNECTED_LAYER_2, - FULLY_CONNECTED_LAYER_3, - FULLY_CONNECTED_LAYER, - CONVOLUTION_LAYER_1, - CONVOLUTION_LAYER_2, - CONVOLUTION_LAYER_3, - CONVOLUTION_LAYER_4, - CONVOLUTION_LAYER, - NO_PATTERN, - }; - -protected: - enum ID StateID; - -public: - enum ID getStateID() { - return StateID; - } - - virtual void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) = 0; - virtual ~AbstractState() {} -}; - -class InitialState : public AbstractState { -public: - InitialState() { - StateID = ID::INITIAL_STATE; - DEBUG(errs() << "new InitialState\n"); - } - ~InitialState() {} - - void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override; -}; - -class FullyConnectedLayer_1 : public AbstractState { -public: - FullyConnectedLayer_1() { - StateID = ID::FULLY_CONNECTED_LAYER_1; - DEBUG(errs() << "new FullyConnectedLayer_1\n"); - } - ~FullyConnectedLayer_1() {} - - void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override; -}; - -class FullyConnectedLayer_2 : public AbstractState { -public: - FullyConnectedLayer_2() { - StateID = ID::FULLY_CONNECTED_LAYER_2; - DEBUG(errs() << "new FullyConnectedLayer_2\n"); - } - ~FullyConnectedLayer_2() {} - - void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override; -}; - -class FullyConnectedLayer_3 : public AbstractState { -public: - FullyConnectedLayer_3() { - StateID = ID::FULLY_CONNECTED_LAYER_3; - DEBUG(errs() << "new FullyConnectedLayer_3\n"); - } - ~FullyConnectedLayer_3() {} - - void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override; -}; - -class FullyConnectedLayer : public AbstractState { -public: - FullyConnectedLayer() { - StateID = ID::FULLY_CONNECTED_LAYER; - DEBUG(errs() << "new FullyConnectedLayer\n"); - } - ~FullyConnectedLayer() {} - - void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override; -}; - -class ConvolutionLayer_1 : public AbstractState { -public: - ConvolutionLayer_1() { - StateID = ID::CONVOLUTION_LAYER_1; - DEBUG(errs() << "new ConvolutionLayer_1\n"); - } - ~ConvolutionLayer_1() {} - - void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override; -}; - -class ConvolutionLayer_2 : public AbstractState { -public: - ConvolutionLayer_2() { - StateID = ID::CONVOLUTION_LAYER_2; - DEBUG(errs() << "new ConvolutionLayer_2\n"); - } - ~ConvolutionLayer_2() {} - - void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override; -}; - -class ConvolutionLayer_3 : public AbstractState { -public: - ConvolutionLayer_3() { - StateID = ID::CONVOLUTION_LAYER_3; - DEBUG(errs() << "new ConvolutionLayer_3\n"); - } - ~ConvolutionLayer_3() {} - - void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override; -}; - -class ConvolutionLayer_4 : public AbstractState { -public: - ConvolutionLayer_4() { - StateID = ID::CONVOLUTION_LAYER_4; - DEBUG(errs() << "new ConvolutionLayer_4\n"); - } - ~ConvolutionLayer_4() {} - - void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override; -}; - -class ConvolutionLayer : public AbstractState { -public: - ConvolutionLayer() { - StateID = ID::CONVOLUTION_LAYER; - DEBUG(errs() << "new ConvolutionLayer\n"); - } - ~ConvolutionLayer() {} - - void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override; -}; - -class NoPattern : public AbstractState { -public: - NoPattern() { - StateID = ID::NO_PATTERN; - DEBUG(errs() << "new NoPattern\n"); - } - ~NoPattern() {} - - void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override; -}; - -void InitialState::transition(CodeGenStateMachine *Mch, IntrinsicInst *II) { - if (II) { // Not end of instruction stream - switch (II->getIntrinsicID()) { - case Intrinsic::visc_tensor_convolution: - { - Mch->addIntrinsicInst(II); - Mch->addArgument(II->getOperand(0)); // conv input - - // Read quantization levels for input - float i_min, i_max; - Mch->getNextQuantizationLevel(i_min); - Mch->getNextQuantizationLevel(i_max); - errs() << "i_min: " << i_min << "\n"; - errs() << "i_max: " << i_max << "\n"; - - // Create associated arguments for the quantization levels - Constant *IminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) i_min); -// errs() << "IminC : " -// << dyn_cast<ConstantFP>(IminC)->getValueAPF().convertToFloat() -// << "\n"; - Mch->addArgument(IminC); - Constant *ImaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) i_max); - Mch->addArgument(ImaxC); - - Mch->addArgument(II->getOperand(1)); // conv kernel - - // Read quantization levels for filter - float w_min, w_max; - Mch->getNextQuantizationLevel(w_min); - Mch->getNextQuantizationLevel(w_max); - errs() << "w_min: " << w_min << "\n"; - errs() << "w_max: " << w_max << "\n"; - Constant *WminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) w_min); - Mch->addArgument(WminC); - Constant *WmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) w_max); - Mch->addArgument(WmaxC); - - Mch->setCurrent(new ConvolutionLayer_1()); - } - break; - case Intrinsic::visc_tensor_mul: - { - Mch->addIntrinsicInst(II); - Mch->addArgument(II->getOperand(0)); // 1st gemm input - - // Read quantization levels for input - float i_min, i_max; - Mch->getNextQuantizationLevel(i_min); - Mch->getNextQuantizationLevel(i_max); - errs() << "i_min: " << i_min << "\n"; - errs() << "i_max: " << i_max << "\n"; - - Constant *IminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) i_min); - Mch->addArgument(IminC); - Constant *ImaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) i_max); - Mch->addArgument(ImaxC); - - Mch->addArgument(II->getOperand(1)); // 2nd gemm input - - // Read quantization levels for weight - float w_min, w_max; - Mch->getNextQuantizationLevel(w_min); - Mch->getNextQuantizationLevel(w_max); - errs() << "w_min: " << w_min << "\n"; - errs() << "w_max: " << w_max << "\n"; - - Constant *WminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) w_min); - Mch->addArgument(WminC); - Constant *WmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) w_max); - Mch->addArgument(WmaxC); - - Mch->setCurrent(new FullyConnectedLayer_1()); - } - break; - default: // Other HPVM intrinsic - Mch->setCurrent(new NoPattern()); - break; - } - delete this; - } // else {} // No HPVM intrinsic received. Remain at initial -} - -void FullyConnectedLayer_1::transition(CodeGenStateMachine *Mch, - IntrinsicInst *II) { - if (II) { // Not end of instruction stream - switch (II->getIntrinsicID()) { - case Intrinsic::visc_tensor_add: - { - IntrinsicInst *MulII = Mch->getIntrinsicInstAt(0); - assert((MulII == II->getOperand(0)) && - "Output of mul must be used as 1st operand of add"); - Mch->addIntrinsicInst(II); - - Mch->addArgument(II->getOperand(1)); // bias - - // Read quantization levels for input - float b_min, b_max; - Mch->getNextQuantizationLevel(b_min); - Mch->getNextQuantizationLevel(b_max); - errs() << "b_min: " << b_min << "\n"; - errs() << "b_max: " << b_max << "\n"; - - Constant *BminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) b_min); - Mch->addArgument(BminC); - Constant *BmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) b_max); - Mch->addArgument(BmaxC); - - Mch->setCurrent(new FullyConnectedLayer_2()); - } - break; - default: - Mch->setCurrent(new NoPattern()); - break; - } - } else { - Mch->setCurrent(new NoPattern()); - } - delete this; -} - -void FullyConnectedLayer_2::transition(CodeGenStateMachine *Mch, - IntrinsicInst *II) { - if (II) { // Not end of instruction stream - switch (II->getIntrinsicID()) { - case Intrinsic::visc_tensor_tanh: - { - // Type of activation : TanH - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 0)); - - // Read quantization levels for output - float out_min, out_max; - Mch->getNextQuantizationLevel(out_min); - Mch->getNextQuantizationLevel(out_max); - errs() << "out_min: " << out_min << "\n"; - errs() << "out_max: " << out_max << "\n"; - - Constant *OutminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) out_min); - Mch->addArgument(OutminC); - Constant *OutmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) out_max); - Mch->addArgument(OutmaxC); - - Mch->addIntrinsicInst(II); - - Mch->setCurrent(new FullyConnectedLayer_3()); - } - break; - case Intrinsic::visc_tensor_relu: - { - // Type of activation : ReLU - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 1)); - - // Read quantization levels for output - float out_min, out_max; - Mch->getNextQuantizationLevel(out_min); - Mch->getNextQuantizationLevel(out_max); - errs() << "out_min: " << out_min << "\n"; - errs() << "out_max: " << out_max << "\n"; - - Constant *OutminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) out_min); - Mch->addArgument(OutminC); - Constant *OutmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) out_max); - Mch->addArgument(OutmaxC); - - Mch->addIntrinsicInst(II); - - Mch->setCurrent(new FullyConnectedLayer_3()); - } - break; - case Intrinsic::visc_tensor_clipped_relu: - { - // Type of activation : Clipped ReLU - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 2)); - - // Read quantization levels for output - float out_min, out_max; - Mch->getNextQuantizationLevel(out_min); - Mch->getNextQuantizationLevel(out_max); - errs() << "out_min: " << out_min << "\n"; - errs() << "out_max: " << out_max << "\n"; - - Constant *OutminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) out_min); - Mch->addArgument(OutminC); - Constant *OutmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) out_max); - Mch->addArgument(OutmaxC); - - Mch->addIntrinsicInst(II); - - Mch->setCurrent(new FullyConnectedLayer_3()); - } - break; - default: // No activation, but HPVM intrinsic - Mch->setCurrent(new NoPattern()); - break; - } - } else { // End of instruction stream - // No activation - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), -1)); - - // Read quantization levels for output - float out_min, out_max; - Mch->getNextQuantizationLevel(out_min); - Mch->getNextQuantizationLevel(out_max); - errs() << "out_min: " << out_min << "\n"; - errs() << "out_max: " << out_max << "\n"; - - Constant *OutminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) out_min); - Mch->addArgument(OutminC); - Constant *OutmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) out_max); - Mch->addArgument(OutmaxC); - - Mch->setCurrent(new FullyConnectedLayer()); - } - delete this; -} - -void FullyConnectedLayer_3::transition(CodeGenStateMachine *Mch, - IntrinsicInst *II) { - if (!II) { // End of instruction stream - Mch->setCurrent(new FullyConnectedLayer()); - } else { - Mch->setCurrent(new NoPattern()); - } - delete this; -} - -void FullyConnectedLayer::transition(CodeGenStateMachine *Mch, - IntrinsicInst *II) { - if (II) { // Not end of instruction stream - Mch->setCurrent(new NoPattern()); - delete this; - } -} - -void ConvolutionLayer_1::transition(CodeGenStateMachine *Mch, - IntrinsicInst *II) { - if (II) { // Not end of instruction stream - switch (II->getIntrinsicID()) { - case Intrinsic::visc_tensor_add: - { - IntrinsicInst *ConvII = Mch->getIntrinsicInstAt(0); - assert((ConvII == II->getOperand(0)) && - "Output of conv must be used as 1st operand of add"); - Mch->addIntrinsicInst(II); - - Mch->addArgument(II->getOperand(1)); // bias - // Read quantization levels for bias - float b_min, b_max; - Mch->getNextQuantizationLevel(b_min); - Mch->getNextQuantizationLevel(b_max); - errs() << "b_min: " << b_min << "\n"; - errs() << "b_max: " << b_max << "\n"; - - Constant *BminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) b_min); - Mch->addArgument(BminC); - Constant *BmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) b_max); - Mch->addArgument(BmaxC); - - Mch->addArgument(ConvII->getOperand(2)); // 1st numeric arg of conv - Mch->addArgument(ConvII->getOperand(3)); // 2nd numeric arg of conv - Mch->addArgument(ConvII->getOperand(4)); // 3rd numeric arg of conv - Mch->addArgument(ConvII->getOperand(5)); // 4th numeric arg of conv - - Mch->setCurrent(new ConvolutionLayer_2()); - } - break; - default: - Mch->setCurrent(new NoPattern()); - break; - } - } else { - // No addition - Mch->addArgument(ConstantPointerNull::get( - Type::getInt8PtrTy(Mch->getModule()->getContext()))); - // Still need to add the quantization constants - and remove them from file - float b_min, b_max; - Mch->getNextQuantizationLevel(b_min); - Mch->getNextQuantizationLevel(b_max); - errs() << "b_min: " << b_min << "\n"; - errs() << "b_max: " << b_max << "\n"; - Constant *BminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) b_min); - Mch->addArgument(BminC); - Constant *BmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) b_max); - Mch->addArgument(BmaxC); - - // Zero for all convolution numeric arguments FIXME??? - IntrinsicInst *ConvII = Mch->getIntrinsicInstAt(0); - Mch->addArgument(ConvII->getOperand(2)); // 1st numeric arg of conv - Mch->addArgument(ConvII->getOperand(3)); // 2nd numeric arg of conv - Mch->addArgument(ConvII->getOperand(4)); // 3rd numeric arg of conv - Mch->addArgument(ConvII->getOperand(5)); // 4th numeric arg of conv -// Mch->addArgument(ConstantInt::get( -// Type::getInt32Ty(Mch->getModule()->getContext()), 0)); -// Mch->addArgument(ConstantInt::get( -// Type::getInt32Ty(Mch->getModule()->getContext()), 0)); -// Mch->addArgument(ConstantInt::get( -// Type::getInt32Ty(Mch->getModule()->getContext()), 0)); -// Mch->addArgument(ConstantInt::get( -// Type::getInt32Ty(Mch->getModule()->getContext()), 0)); - - // No pooling - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 0)); - // 0 for unused pool argument - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 0)); - // No activation - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), -1)); - - // Read quantization levels for output - float out_min, out_max; - Mch->getNextQuantizationLevel(out_min); - Mch->getNextQuantizationLevel(out_max); - errs() << "out_min: " << out_min << "\n"; - errs() << "out_max: " << out_max << "\n"; - - Constant *OutminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) out_min); - Mch->addArgument(OutminC); - Constant *OutmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) out_max); - Mch->addArgument(OutmaxC); - - Mch->setCurrent(new ConvolutionLayer()); - } - delete this; -} - -void ConvolutionLayer_2::transition(CodeGenStateMachine *Mch, - IntrinsicInst *II) { - if (II) { // Not end of instruction stream - switch (II->getIntrinsicID()) { - case Intrinsic::visc_tensor_tanh: - { - // Type of activation : TanH -// Mch->addArgument(ConstantInt::get( -// Type::getInt32Ty(Mch->getModule()->getContext()), 0)); - Mch->addIntrinsicInst(II); - - Mch->setCurrent(new ConvolutionLayer_3()); - } - break; - case Intrinsic::visc_tensor_relu: - { - // Type of activation : ReLU -// Mch->addArgument(ConstantInt::get( -// Type::getInt32Ty(Mch->getModule()->getContext()), 1)); - Mch->addIntrinsicInst(II); - - Mch->setCurrent(new ConvolutionLayer_3()); - } - break; - case Intrinsic::visc_tensor_clipped_relu: - { - // Type of activation : Clipped ReLU -// Mch->addArgument(ConstantInt::get( -// Type::getInt32Ty(Mch->getModule()->getContext()), 2)); - Mch->addIntrinsicInst(II); - - Mch->setCurrent(new ConvolutionLayer_3()); - } - break; - case Intrinsic::visc_tensor_pool_max: - { - // pool max - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 0)); - // poolSize - Mch->addArgument(II->getOperand(1)); - // No activation - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), -1)); - Mch->addIntrinsicInst(II); - - // Read quantization levels for output - float out_min, out_max; - Mch->getNextQuantizationLevel(out_min); - Mch->getNextQuantizationLevel(out_max); - errs() << "out_min: " << out_min << "\n"; - errs() << "out_max: " << out_max << "\n"; - - Constant *OutminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) out_min); - Mch->addArgument(OutminC); - Constant *OutmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) out_max); - Mch->addArgument(OutmaxC); - - Mch->setCurrent(new ConvolutionLayer_4()); - } - break; - case Intrinsic::visc_tensor_pool_min: - { - // pool min FIXME: 2: supported? - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 2)); - // poolSize - Mch->addArgument(II->getOperand(1)); - // No activation - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), -1)); - Mch->addIntrinsicInst(II); - - // Read quantization levels for output - float out_min, out_max; - Mch->getNextQuantizationLevel(out_min); - Mch->getNextQuantizationLevel(out_max); - errs() << "out_min: " << out_min << "\n"; - errs() << "out_max: " << out_max << "\n"; - - Constant *OutminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) out_min); - Mch->addArgument(OutminC); - Constant *OutmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) out_max); - Mch->addArgument(OutmaxC); - - Mch->setCurrent(new ConvolutionLayer_4()); - } - break; - case Intrinsic::visc_tensor_pool_mean: - { - // pool mean - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 1)); - // poolSize - Mch->addArgument(II->getOperand(1)); - // No activation - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), -1)); - Mch->addIntrinsicInst(II); - - // Read quantization levels for output - float out_min, out_max; - Mch->getNextQuantizationLevel(out_min); - Mch->getNextQuantizationLevel(out_max); - errs() << "out_min: " << out_min << "\n"; - errs() << "out_max: " << out_max << "\n"; - - Constant *OutminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) out_min); - Mch->addArgument(OutminC); - Constant *OutmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) out_max); - Mch->addArgument(OutmaxC); - - Mch->setCurrent(new ConvolutionLayer_4()); - } - break; - default: // No activation, No pooling, but HPVM intrinsic - Mch->setCurrent(new NoPattern()); - break; - } - } else { // End of instruction stream - // No pooling - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 0)); - // 0 for unused pool argument - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 0)); - // No activation - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), -1)); - - // Read quantization levels for output - float out_min, out_max; - Mch->getNextQuantizationLevel(out_min); - Mch->getNextQuantizationLevel(out_max); - errs() << "out_min: " << out_min << "\n"; - errs() << "out_max: " << out_max << "\n"; - - Constant *OutminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) out_min); - Mch->addArgument(OutminC); - Constant *OutmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) out_max); - Mch->addArgument(OutmaxC); - - Mch->setCurrent(new ConvolutionLayer()); - } - delete this; -} - -void ConvolutionLayer_3::transition(CodeGenStateMachine *Mch, - IntrinsicInst *II) { - if (II) { // Not end of instruction stream - switch (II->getIntrinsicID()) { - case Intrinsic::visc_tensor_pool_max: - { - // pool max - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 0)); - // poolSize - Mch->addArgument(II->getOperand(1)); - Mch->addIntrinsicInst(II); - - // Revisit last intrinsic, to add argument for activation operation - IntrinsicInst *ActII = Mch->getIntrinsicInstAt(2); - // Due to previous switch, we know it is a TanH, ReLU, or Clipped ReLU - Intrinsic::ID ActIID = ActII->getIntrinsicID(); - if (ActIID == Intrinsic::visc_tensor_tanh) { - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 0)); - } else if (ActIID == Intrinsic::visc_tensor_relu) { - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 1)); - } else { //ActIID == Intrinsic::visc_tensor_clipped_relu - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 2)); - } - - // Read quantization levels for output - float out_min, out_max; - Mch->getNextQuantizationLevel(out_min); - Mch->getNextQuantizationLevel(out_max); - errs() << "out_min: " << out_min << "\n"; - errs() << "out_max: " << out_max << "\n"; - - Constant *OutminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) out_min); - Mch->addArgument(OutminC); - Constant *OutmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) out_max); - Mch->addArgument(OutmaxC); - - Mch->setCurrent(new ConvolutionLayer_4()); - } - break; - case Intrinsic::visc_tensor_pool_min: - { - // pool min FIXME: 2: supported? - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 2)); - // poolSize - Mch->addArgument(II->getOperand(1)); - Mch->addIntrinsicInst(II); - - // Revisit last intrinsic, to add argument for activation operation - IntrinsicInst *ActII = Mch->getIntrinsicInstAt(2); - // Due to previous switch, we know it is a TanH, ReLU, or Clipped ReLU - Intrinsic::ID ActIID = ActII->getIntrinsicID(); - if (ActIID == Intrinsic::visc_tensor_tanh) { - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 0)); - } else if (ActIID == Intrinsic::visc_tensor_relu) { - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 1)); - } else { //ActIID == Intrinsic::visc_tensor_clipped_relu - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 2)); - } - - // Read quantization levels for output - float out_min, out_max; - Mch->getNextQuantizationLevel(out_min); - Mch->getNextQuantizationLevel(out_max); - errs() << "out_min: " << out_min << "\n"; - errs() << "out_max: " << out_max << "\n"; - - Constant *OutminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) out_min); - Mch->addArgument(OutminC); - Constant *OutmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) out_max); - Mch->addArgument(OutmaxC); - - Mch->setCurrent(new ConvolutionLayer_4()); - } - break; - case Intrinsic::visc_tensor_pool_mean: - { - // pool max - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 1)); - // poolSize - Mch->addArgument(II->getOperand(1)); - Mch->addIntrinsicInst(II); - - // Revisit last intrinsic, to add argument for activation operation - IntrinsicInst *ActII = Mch->getIntrinsicInstAt(2); - // Due to previous switch, we know it is a TanH, ReLU, or Clipped ReLU - Intrinsic::ID ActIID = ActII->getIntrinsicID(); - if (ActIID == Intrinsic::visc_tensor_tanh) { - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 0)); - } else if (ActIID == Intrinsic::visc_tensor_relu) { - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 1)); - } else { //ActIID == Intrinsic::visc_tensor_clipped_relu - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 2)); - } - - // Read quantization levels for output - float out_min, out_max; - Mch->getNextQuantizationLevel(out_min); - Mch->getNextQuantizationLevel(out_max); - errs() << "out_min: " << out_min << "\n"; - errs() << "out_max: " << out_max << "\n"; - - Constant *OutminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) out_min); - Mch->addArgument(OutminC); - Constant *OutmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) out_max); - Mch->addArgument(OutmaxC); - - Mch->setCurrent(new ConvolutionLayer_4()); - } - break; - default: // No pooling, but HPVM intrinsic - Mch->setCurrent(new NoPattern()); - break; - } - } else { // End of instruction stream - // No pooling - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 0)); - // 0 for unused pool argument - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 0)); - - // Revisit last intrinsic, to add argument for activation operation - IntrinsicInst *ActII = Mch->getIntrinsicInstAt(2); - // Due to previous switch, we know it is a TanH, ReLU, or Clipped ReLU - Intrinsic::ID ActIID = ActII->getIntrinsicID(); - if (ActIID == Intrinsic::visc_tensor_tanh) { - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 0)); - } else if (ActIID == Intrinsic::visc_tensor_relu) { - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 1)); - } else { //ActIID == Intrinsic::visc_tensor_clipped_relu - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 2)); - } - - // Read quantization levels for output - float out_min, out_max; - Mch->getNextQuantizationLevel(out_min); - Mch->getNextQuantizationLevel(out_max); - errs() << "out_min: " << out_min << "\n"; - errs() << "out_max: " << out_max << "\n"; - - Constant *OutminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) out_min); - Mch->addArgument(OutminC); - Constant *OutmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) out_max); - Mch->addArgument(OutmaxC); - - Mch->setCurrent(new ConvolutionLayer()); - } - delete this; -} - -void ConvolutionLayer_4::transition(CodeGenStateMachine *Mch, - IntrinsicInst *II) { - if (!II) { // End of instruction stream - Mch->setCurrent(new ConvolutionLayer()); - } else { - Mch->setCurrent(new NoPattern()); - } - delete this; -} - -void ConvolutionLayer::transition(CodeGenStateMachine *Mch, - IntrinsicInst *II) { - if (II) { // Not end of instruction stream - Mch->setCurrent(new NoPattern()); - delete this; - } -} - -void NoPattern::transition(CodeGenStateMachine *Mch, IntrinsicInst *II) {} - -CodeGenStateMachine::CodeGenStateMachine(Module *_M, Module *_RtM, std::ifstream &_qin) : - M(_M), RtM(_RtM), qin(_qin) { - current = new InitialState(); -} - -void CodeGenStateMachine::transition(IntrinsicInst *II) { - current->transition(this, II); -} - -void CodeGenStateMachine::codeGen() { - - if ((current->getStateID() != AbstractState::ID::FULLY_CONNECTED_LAYER) && - (current->getStateID() != AbstractState::ID::CONVOLUTION_LAYER)) { - // Not a valid instruction sequence. - assert(false && "Unsupported instruction sequence by PROMISE simulator\n"); - } - - // We have a valid instruction sequence. - // Make sure that the instruction sequence can be traslated: - // each instruction's result must be used only by the next one in sequence. - for (unsigned p = 0; p < IIs.size()-1; p++) { - IntrinsicInst *II = IIs[p]; - assert((II->hasOneUse()) && - "Instruction sequence does not fit expected pattern: not single use\n"); - - Value::user_iterator ui = II->user_begin(); // The only use - assert((*ui == IIs[p+1]) && - "Instruction sequence does not fit expected pattern: not used by next instruction\n"); - } - - // Create corresponding PROMISE simulator call - CallInst *CI; - switch (current->getStateID()) { - case AbstractState::ID::CONVOLUTION_LAYER: - { - Constant* ConvLayer_PROMISE = - M->getOrInsertFunction(StringRef("ConvLayer_PROMISE"), - RtM->getFunction(StringRef("ConvLayer_PROMISE"))->getFunctionType()); - DEBUG(errs() << *ConvLayer_PROMISE); - - // FIXME: get last argument from some intrinsic. For now, 7 - Args.push_back(ConstantInt::get(Type::getInt32Ty(M->getContext()), 7)); - // Create PROMISE simulator function call - CI = CallInst::Create(ConvLayer_PROMISE, Args, ""); - } - break; - case AbstractState::ID::FULLY_CONNECTED_LAYER: - { - Constant* FCLayer_PROMISE = - M->getOrInsertFunction(StringRef("FCLayer_PROMISE"), - RtM->getFunction(StringRef("FCLayer_PROMISE"))->getFunctionType()); - DEBUG(errs() << *FCLayer_PROMISE); - - // FIXME: get last argument from some intrinsic. For now, 7 - Args.push_back(ConstantInt::get(Type::getInt32Ty(M->getContext()), 7)); - // Create PROMISE simulator function call - CI = CallInst::Create(FCLayer_PROMISE, Args, ""); - } - break; - default: - llvm_unreachable("Unexpected CodeGenStateMachine State\n"); - break; - } - - // Insert new call and replace all uses of pattern result with - // the PROMISE simulator call - IntrinsicInst *IIlast = *(IIs.rbegin()); - CI->insertBefore(IIlast); - IIlast->replaceAllUsesWith(CI); - - // Remove the instructions we translated to the simulator call. - // Traverse the vector backwards, otherwise definitions are deleted while - // their subsequent uses are still around. - for (std::vector<IntrinsicInst *>::reverse_iterator ri = IIs.rbegin(), - re = IIs.rend(); ri != re; ++ri) { - DEBUG(errs() << "Erasing: " << **ri << "\n"); - (*ri)->eraseFromParent(); - } -errs() << "****** GenF:\n" << *(CI->getParent()->getParent()); - -} - -// DFG2LLVM_PROMISE - The first implementation. - -struct DFG2LLVM_PROMISE : public DFG2LLVM { - static char ID; // Pass identification, replacement for typeid - DFG2LLVM_PROMISE() : DFG2LLVM(ID) {} -private: - -public: - - void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<BuildDFG>(); - AU.addPreserved<BuildDFG>(); - } - - bool runOnModule(Module &M); -}; - -// Visitor for Code generation traversal (tree traversal for now) -class CGT_PROMISE : public CodeGenTraversal { - -private: - //Member variables - std::ifstream qin; - - // VISC Runtime API and Tensor runtime API - Constant* llvm_hpvm_initTensorRt; - Constant* llvm_hpvm_cleanupTensorRt; - Constant* hpvm_request_tensor; - - // Functions - - // Virtual Functions - void init(); - void initRuntimeAPI(); - void codeGen(DFInternalNode* N); - void codeGen(DFLeafNode* N); - -public: - - // Constructor - CGT_PROMISE(Module &_M, BuildDFG &_DFG, std::string &_str) : CodeGenTraversal(_M, _DFG) { - qin.open(_str.c_str()); - assert(qin && "Failed to open quantization levels input file\n"); - initRuntimeAPI(); - } - - ~CGT_PROMISE() { - qin.close(); - } - -}; - -void CGT_PROMISE::init() { - // FIXME: what to do here? If anything? -} - -// Initialize the VISC runtime API. This makes it easier to insert these calls -void CGT_PROMISE::initRuntimeAPI() { - - // Load Runtime API Module - SMDiagnostic Err; - - char* LLVM_SRC_ROOT = getenv("LLVM_SRC_ROOT"); - assert(LLVM_SRC_ROOT != NULL && "Define LLVM_SRC_ROOT environment variable!\n"); - - // FIXME: set correct path - Twine llvmSrcRoot = LLVM_SRC_ROOT; - Twine runtimeAPI = llvmSrcRoot+"/projects/hpvm-tensor-rt/lib/tensor_runtime.ll"; - runtimeModule = parseIRFile(runtimeAPI.str(), Err, M.getContext()); - if(runtimeModule == nullptr) - DEBUG(errs() << Err.getMessage()); - else - DEBUG(errs() << "Successfully loaded hpvm-tensor-rt API module\n"); - - // Get or insert Global declarations for - // - initialization - // - cleanup - // - request a tensor - DECLARE(llvm_hpvm_initTensorRt); - DECLARE(llvm_hpvm_cleanupTensorRt); - DECLARE(hpvm_request_tensor); - - // Find visc.init and visc.cleanup calls, and add placeholder methods - // for initialization and cleanup of the hpvm tensor runtime - - Function* VI = M.getFunction("llvm.visc.init"); - assert(VI->getNumUses() == 1 && "__visc__init should only be used once\n"); - InitCall = cast<Instruction>(*VI->user_begin()); - CallInst::Create(llvm_hpvm_initTensorRt, - ArrayRef<Value*>(ConstantInt::get(Type::getInt32Ty(M.getContext()), 0)), - "", InitCall); - - Function* VC = M.getFunction("llvm.visc.cleanup"); - assert(VC->getNumUses() == 1 && "__visc__clear should only be used once\n"); - CleanupCall = cast<Instruction>(*VC->user_begin()); - CallInst::Create(llvm_hpvm_cleanupTensorRt, ArrayRef<Value*>(), "", CleanupCall); - -} - -void CGT_PROMISE::codeGen(DFInternalNode* N) { - errs () << "Inside node: " << N->getFuncPointer()->getName() << "\n"; - errs () << "Skipping internal node\n"; -} - -void CGT_PROMISE::codeGen(DFLeafNode* N) { - - // Skip code generation if it is a dummy node - if(N->isDummyNode()) { - DEBUG(errs() << "Skipping dummy node\n"); - return; - } - - // Abort code generation if it is an allocation node - if(N->isAllocationNode()) { - assert(false && "Allocation Node not expected in ApproxHPVM"); - return; - } - - // Generate code only if it has the right hint - if (!checkPreferredTarget(N, visc::PROMISE_TARGET)) { - errs() << "Skipping node: "<< N->getFuncPointer()->getName() << "\n"; - return; - } - - // Get the function associated with the dataflow node - Function *F = N->getFuncPointer(); -errs() << "Node Function: " << *F << "\n"; - // Look up if we have visited this function before. If we have, then just - // get the cloned function pointer from DFNode. Otherwise, create the cloned - // function and add it to the DFNode GenFunc. - Function *F_promise = N->getGenFuncForTarget(visc::PROMISE_TARGET); - - assert((F_promise == NULL) && - "Error: Visiting a node for which code already generated"); - - // Clone the function - ValueToValueMapTy VMap; - std::string FName(F->getName().data());//Twine FName = F->getName(); - F_promise = CloneFunction(F, VMap); - F_promise->setName(FName+"_promise"); - F_promise->removeFromParent(); - M.getFunctionList().push_back(F_promise); - - N->addGenFunc(F_promise, visc::PROMISE_TARGET, true); - - /* Removing HPVM in/out/inout function attributes */ - for(Function::arg_iterator ai = F_promise->arg_begin(), ae = F_promise->arg_end(); - ai != ae; ai++){ - Argument *Arg = &*ai; - if(Arg->hasAttribute(Attribute::In)) - Arg->removeAttr(Attribute::In); - if(Arg->hasAttribute(Attribute::Out)) - Arg->removeAttr(Attribute::Out); - if(Arg->hasAttribute(Attribute::InOut)) - Arg->removeAttr(Attribute::InOut); - } - - // Adding nounwind to generated function : FIXME: needed? - DEBUG(errs() << "Adding nounwind to generated function\n"); - F_promise->addAttribute(AttributeSet::FunctionIndex, Attribute::NoUnwind); - - // Add llvm_visc_requestTensor calls for every pointer argument of the function - // (they are all expected to be tensors), at the beginning of the function. - // This is the first instruction of the function, insert them before this - Instruction* FI = &*(F_promise->getEntryBlock().begin()); - - // FIXME: verify that we want 0 as a target device - // In this backend, the target device is CPU, represented by i32 0. - ConstantInt *TargetDeviceID = - ConstantInt::get(Type::getInt32Ty(M.getContext()), 0); - - for (Function::arg_iterator ai = F_promise->arg_begin(), - ae = F_promise->arg_end(); ai != ae; ++ai) { - Argument* Arg = &*ai; - if (Arg->getType()->isPointerTy()) { - Value *Args[] = {Arg, TargetDeviceID}; - CallInst::Create(hpvm_request_tensor, - ArrayRef<Value*>(Args, 2), - "", FI); - } - } - - CodeGenStateMachine CGM(&M, runtimeModule.get(), qin); - - /* An assumption is made for the PROMISE simulator: * - * a leaf node will contain consequtive operations that will map to a * - * single PROMISE simulator call */ - - for (inst_iterator i = inst_begin(F_promise), e = inst_end(F_promise); - i != e; ++i) { - Instruction *I = &(*i); - CGM.transition(dyn_cast<IntrinsicInst>(I)); - } - - CGM.codeGen(); - -//errs() << "-----------------------------------\n"; -//errs() << *F_promise << "\n"; - - return; -} - -bool DFG2LLVM_PROMISE::runOnModule(Module &M) { - errs() << "\nDFG2LLVM_PROMISE PASS\n"; - - errs() << QuantizationInputsFilename << "\n"; - -// std::ifstream qin(quantizationInputsFilename_cstr); -// std::ifstream qin; -// qin.open(QuantizationInputsFilename.c_str()); -// qin.open(QuantizationInputsFilename.c_str(), std::ifstream::in); - - // Get the BuildDFG Analysis Results: - // - Dataflow graph - BuildDFG &DFG = getAnalysis<BuildDFG>(); - - std::vector<DFInternalNode*> Roots = DFG.getRoots(); - - // Visitor for Code Generation Graph Traversal - CGT_PROMISE *CGTVisitor = new CGT_PROMISE(M, DFG, QuantizationInputsFilename); - - // Iterate over all the DFGs and produce code for each one of them - for (auto rootNode: Roots) { - // Initiate code generation for root DFNode - CGTVisitor->visit(rootNode); - } - - //TODO: Edit module epilogue to remove the VISC intrinsic declarations - delete CGTVisitor; - - return true; -} - - -/****************************************************************************** - * Helper functions * - ******************************************************************************/ - -} // End of namespace - -char DFG2LLVM_PROMISE::ID = 0; -static RegisterPass<DFG2LLVM_PROMISE> X("dfg2llvm-promise", - "Dataflow Graph to LLVM for PROMISE Pass", - false /* does not modify the CFG */, - true /* transformation, * - * not just analysis */); - diff --git a/lib/DFG2LLVM_PROMISE/DFG2LLVM_PROMISE.exports b/lib/DFG2LLVM_PROMISE/DFG2LLVM_PROMISE.exports deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/lib/DFG2LLVM_PROMISE/LLVMBuild.txt b/lib/DFG2LLVM_PROMISE/LLVMBuild.txt deleted file mode 100644 index 714ad14f18c136998d25cda12e44e28ad191028e..0000000000000000000000000000000000000000 --- a/lib/DFG2LLVM_PROMISE/LLVMBuild.txt +++ /dev/null @@ -1,21 +0,0 @@ -;===- ./lib/Transforms/DFG2LLVM_NVPTX/LLVMBuild.txt ------------*- Conf -*--===; -; -; The LLVM Compiler Infrastructure -; -; This file is distributed under the University of Illinois Open Source -; License. See LICENSE.TXT for details. -; -;===------------------------------------------------------------------------===; -; -; This is an LLVMBuild description file for the components in this subdirectory. -; -; For more information on the LLVMBuild system, please see: -; -; http://llvm.org/docs/LLVMBuild.html -; -;===------------------------------------------------------------------------===; - -[component_0] -type = Library -name = DFG2LLVM_PROMISE -parent = Transforms diff --git a/lib/DFG2LLVM_SPIR/CMakeLists.txt b/lib/DFG2LLVM_SPIR/CMakeLists.txt deleted file mode 100644 index 43e2254c7930d8f36142338412ec0a1b87f789ab..0000000000000000000000000000000000000000 --- a/lib/DFG2LLVM_SPIR/CMakeLists.txt +++ /dev/null @@ -1,12 +0,0 @@ -if(WIN32 OR CYGWIN) - set(LLVM_LINK_COMPONENTS Core Support) -endif() - -add_llvm_loadable_module( LLVMDFG2LLVM_SPIR - DFG2LLVM_SPIR.cpp - - DEPENDS - intrinsics_gen - PLUGIN_TOOL - opt - ) diff --git a/lib/DFG2LLVM_SPIR/DFG2LLVM_SPIR.cpp b/lib/DFG2LLVM_SPIR/DFG2LLVM_SPIR.cpp deleted file mode 100644 index 48b1492047d5d98a9018f9cff2d94f7450cb490b..0000000000000000000000000000000000000000 --- a/lib/DFG2LLVM_SPIR/DFG2LLVM_SPIR.cpp +++ /dev/null @@ -1,2010 +0,0 @@ -//=== DFG2LLVM_SPIR.cpp ===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#define ENABLE_ASSERTS -#define TARGET_PTX 32 -#define GENERIC_ADDRSPACE 0 -#define GLOBAL_ADDRSPACE 1 -#define SHARED_ADDRSPACE 3 - -#define DEBUG_TYPE "DFG2LLVM_SPIR" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/Module.h" -#include "llvm/Pass.h" -#include "llvm/IR/PassManager.h" -#include "llvm/IR/InstIterator.h" -#include "llvm/Transforms/Utils/ValueMapper.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Cloning.h" -#include "llvm/IRReader/IRReader.h" -#include "llvm/Linker/Linker.h" -#include "llvm/Support/SourceMgr.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Bitcode/BitcodeReader.h" -#include "llvm/Bitcode/BitcodeWriter.h" -#include "llvm/IR/Attributes.h" -#include "llvm/SupportVISC/VISCHint.h" -#include "llvm/SupportVISC/VISCTimer.h" -#include "llvm/SupportVISC/DFG2LLVM.h" -#include "llvm/Transforms/Scalar.h" -#include "llvm-c/Core.h" - -#include "llvm/SupportVISC/VISCUtils.h" -#include "llvm/IR/IRPrintingPasses.h" -#include "llvm/IR/LegacyPassManager.h" -#include "llvm/Support/ToolOutputFile.h" -#include "llvm/IR/UseListOrder.h" - -#include <sstream> - -using namespace llvm; -using namespace builddfg; -using namespace dfg2llvm; -using namespace viscUtils; - -// VISC Command line option to use timer or not -static cl::opt<bool> -VISCTimer_SPIR("visc-timers-spir", cl::desc("Enable visc timers")); - -namespace { -// Helper class declarations - -// Class to maintain the tuple of host pointer, device pointer and size -// in bytes. Would have preferred to use tuple but support not yet available -class OutputPtr { -public: - OutputPtr(Value* _h_ptr, Value* _d_ptr, Value* _bytes) - : h_ptr(_h_ptr), d_ptr(_d_ptr), bytes(_bytes) {} - - Value* h_ptr; - Value* d_ptr; - Value* bytes; -}; - -// Class to maintain important kernel info required for generating runtime -// calls -class Kernel { -public: - Kernel(Function* _KF, DFLeafNode* _KLeafNode, std::map<unsigned, unsigned> _inArgMap = - std::map<unsigned, unsigned>(), - std::map<unsigned, std::pair<Value*, unsigned> > _sharedInArgMap = - std::map<unsigned, std::pair<Value*, unsigned> >(), - std::vector<unsigned> _outArgMap = std::vector<unsigned>(), - unsigned _gridDim = 0, std::vector<Value*> _globalWGSize = std::vector<Value*>(), - unsigned _blockDim = 0, std::vector<Value*> _localWGSize = std::vector<Value*>()) - : KernelFunction(_KF), KernelLeafNode(_KLeafNode), inArgMap(_inArgMap), - sharedInArgMap(_sharedInArgMap), outArgMap(_outArgMap), gridDim(_gridDim), - globalWGSize(_globalWGSize), blockDim(_blockDim), localWGSize(_localWGSize) { - - assert(gridDim == globalWGSize.size() - && "gridDim should be same as the size of vector globalWGSize"); - assert(blockDim == localWGSize.size() - && "blockDim should be same as the size of vector localWGSize"); - } - - Function* KernelFunction; - DFLeafNode* KernelLeafNode; - std::map<unsigned, unsigned> inArgMap; - // Map for shared memory arguments - std::map<unsigned, std::pair<Value*, unsigned> > sharedInArgMap; - // Fields for (potential) allocation node - DFLeafNode* AllocationNode; - Function* AllocationFunction; - std::map<unsigned, unsigned> allocInArgMap; - - std::vector<unsigned> outArgMap; - unsigned gridDim; - std::vector<Value*> globalWGSize; - unsigned blockDim; - std::vector<Value*> localWGSize; - std::vector<int> localDimMap; - - std::map<unsigned, unsigned> getInArgMap() { - return inArgMap; - } - void setInArgMap(std::map<unsigned, unsigned> map) { - inArgMap = map; - } - - std::map<unsigned, std::pair<Value*, unsigned> > getSharedInArgMap() { - return sharedInArgMap; - } - void setSharedInArgMap(std::map<unsigned, std::pair<Value*, unsigned> > map) { - sharedInArgMap = map; - } - - std::vector<unsigned> getOutArgMap() { - return outArgMap; - } - void setOutArgMap(std::vector<unsigned> map) { - outArgMap = map; - } - - void setLocalWGSize(std::vector<Value*> V) { - localWGSize = V; - } - - bool hasLocalWG() { - return blockDim != 0; - } -}; - -// Helper function declarations -static void getExecuteNodeParams(Module &M, Value* &, Value* &, Value* &, Kernel*, - ValueToValueMapTy&, Instruction*); -static Value* genWorkGroupPtr(Module &M, std::vector<Value*>, ValueToValueMapTy&, - Instruction*, const Twine& WGName = "WGSize"); -static std::string getSPIRFilename(const Module&); -static std::string getFilenameFromModule(const Module& M); -static void changeDataLayout(Module &); -static void changeTargetTriple(Module &); -static std::string printType(Type*); -static StringRef getMangledName(std::string); -static StringRef getAtomicMangledName(std::string, unsigned, bool); -static void findReturnInst(Function *, std::vector<ReturnInst *> &); -static void findIntrinsicInst(Function *, Intrinsic::ID, std::vector<IntrinsicInst *> &); -static StringRef getAtomicOpName(Intrinsic::ID, unsigned); -static std::string getMathFunctionName(Intrinsic::ID); - -// DFG2LLVM_SPIR - The first implementation. -struct DFG2LLVM_SPIR : public DFG2LLVM { - static char ID; // Pass identification, replacement for typeid - DFG2LLVM_SPIR() : DFG2LLVM(ID) {} - -private: - -public: - bool runOnModule(Module &M); -}; - -// Visitor for Code generation traversal (tree traversal for now) -class CGT_SPIR : public CodeGenTraversal { - -private: - //Member variables - std::unique_ptr<Module> KernelM; - DFNode* KernelLaunchNode = nullptr; - Kernel* kernel; - - // VISC Runtime API - Constant* llvm_visc_ocl_launch; - Constant* llvm_visc_ocl_wait; - Constant* llvm_visc_ocl_initContext; - Constant* llvm_visc_ocl_clearContext; - Constant* llvm_visc_ocl_argument_shared; - Constant* llvm_visc_ocl_argument_scalar; - Constant* llvm_visc_ocl_argument_ptr; - Constant* llvm_visc_ocl_output_ptr; - Constant* llvm_visc_ocl_free; - Constant* llvm_visc_ocl_getOutput; - Constant* llvm_visc_ocl_executeNode; - - //Functions - std::string getKernelsModuleName(Module &M); - void fixValueAddrspace(Value* V, unsigned addrspace); - Function* changeArgAddrspace(Function* F, std::vector<unsigned> &Args, unsigned i); - void removeAttributeAtArguments(Function* F, std::vector<unsigned> &Ags, Attribute::AttrKind attrKind); - void addCLMetadata(Function* F); - Function* transformFunctionToVoid(Function* F); - void removeInOutAttributes(Function* F); - void insertRuntimeCalls(DFInternalNode* N, Kernel* K, const Twine& FileName); - - // Virtual Functions - void init() { - VISCTimer = VISCTimer_SPIR; - TargetName = "SPIR"; - } - void initRuntimeAPI(); - void codeGen(DFInternalNode* N); - void codeGen(DFLeafNode* N); - -public: - - // Constructor - CGT_SPIR(Module &_M, BuildDFG &_DFG) : CodeGenTraversal(_M, _DFG), KernelM(CloneModule(&_M)) { - KernelLaunchNode = NULL; - init(); - initRuntimeAPI(); - errs() << "Old module pointer: " << &_M << "\n"; - errs() << "New module pointer: " << KernelM.get() << "\n"; - // Copying instead of creating new, in order to preserve required info (metadata) - // Remove functions, global variables and aliases - std::vector<GlobalVariable*> gvv = std::vector<GlobalVariable*>(); - for (Module::global_iterator mi = KernelM->global_begin(), - me = KernelM->global_end(); (mi != me); ++mi) { - GlobalVariable* gv = &*mi; - gvv.push_back(gv); - } - for (std::vector<GlobalVariable*>::iterator vi = gvv.begin(); vi != gvv.end(); ++vi) { - (*vi)->replaceAllUsesWith(UndefValue::get((*vi)->getType())); - (*vi)->eraseFromParent(); - } - - std::vector<Function*> fv = std::vector<Function*>(); - for (Module::iterator mi = KernelM->begin(), - me = KernelM->end(); (mi != me); ++mi) { - Function* f = &*mi; - fv.push_back(f); - } - for (std::vector<Function*>::iterator vi = fv.begin(); vi != fv.end(); ++vi) { - (*vi)->replaceAllUsesWith(UndefValue::get((*vi)->getType())); - (*vi)->eraseFromParent(); - } - - std::vector<GlobalAlias*> av = std::vector<GlobalAlias*>(); - for (Module::alias_iterator mi = KernelM->alias_begin(), - me = KernelM->alias_end(); (mi != me); ++mi) { - GlobalAlias* a = &*mi; - av.push_back(a); - } - for (std::vector<GlobalAlias*>::iterator vi = av.begin(); vi != av.end(); ++vi) { - (*vi)->replaceAllUsesWith(UndefValue::get((*vi)->getType())); - (*vi)->eraseFromParent(); - } - - changeDataLayout(*KernelM); - changeTargetTriple(*KernelM); - - DEBUG(errs() << *KernelM); - - } - - void removeLLVMIntrinsics(); - void writeKernelsModule(); -}; - -// Initialize the VISC runtime API. This makes it easier to insert these calls -void CGT_SPIR::initRuntimeAPI() { - - // Load Runtime API Module - SMDiagnostic Err; - - char* LLVM_SRC_ROOT = getenv("LLVM_SRC_ROOT"); - assert(LLVM_SRC_ROOT != NULL && "Define LLVM_SRC_ROOT environment variable!"); - - Twine llvmSrcRoot = LLVM_SRC_ROOT; - Twine runtimeAPI = llvmSrcRoot+"/../build/projects/visc-rt/visc-rt.ll"; - errs() << "Open file: " << runtimeAPI.str() << "\n"; - runtimeModule = parseIRFile(runtimeAPI.str(), Err, M.getContext()); - if(runtimeModule == NULL) - DEBUG(errs() << Err.getMessage()); - else - errs() << "Successfully loaded visc-rt API module\n"; - - // Get or insert the global declarations for launch/wait functions - DECLARE(llvm_visc_ocl_launch); - DECLARE(llvm_visc_ocl_wait); - DECLARE(llvm_visc_ocl_initContext); - DECLARE(llvm_visc_ocl_clearContext); - DECLARE(llvm_visc_ocl_argument_shared); - DECLARE(llvm_visc_ocl_argument_scalar); - DECLARE(llvm_visc_ocl_argument_ptr); - DECLARE(llvm_visc_ocl_output_ptr); - DECLARE(llvm_visc_ocl_free); - DECLARE(llvm_visc_ocl_getOutput); - DECLARE(llvm_visc_ocl_executeNode); - - // Get or insert timerAPI functions as well if you plan to use timers - initTimerAPI(); - - // Insert init context in main - DEBUG(errs() << "Gen Code to initialize SPIR Timer\n"); - Function* VI = M.getFunction("llvm.visc.init"); - assert(VI->getNumUses() == 1 && "__visc__init should only be used once"); - - InitCall = cast<Instruction>(*VI->user_begin()); - initializeTimerSet(InitCall); - switchToTimer(visc_TimerID_INIT_CTX, InitCall); - CallInst::Create(llvm_visc_ocl_initContext, - ArrayRef<Value*>(getTargetID(M, visc::SPIR_TARGET)), - "", InitCall); - switchToTimer(visc_TimerID_NONE, InitCall); - - // Insert print instruction at visc exit - DEBUG(errs() << "Gen Code to print SPIR Timer\n"); - Function* VC = M.getFunction("llvm.visc.cleanup"); - DEBUG(errs() << *VC << "\n"); - assert(VC->getNumUses() == 1 && "__visc__clear should only be used once"); - - CleanupCall = cast<Instruction>(*VC->user_begin()); - printTimerSet(CleanupCall); - - -} - -// Generate Code to call the kernel -// The plan is to replace the internal node with a leaf node. This method is -// used to generate a function to associate with this leaf node. The function -// is responsible for all the memory allocation/transfer and invoking the -// kernel call on the device -void CGT_SPIR::insertRuntimeCalls(DFInternalNode* N, Kernel* K, const Twine& FileName) { - // Check if clone already exists. If it does, it means we have visited this - // function before. -// assert(N->getGenFunc() == NULL && "Code already generated for this node"); - - assert(N->getGenFuncForTarget(visc::SPIR_TARGET) == NULL && - "Code already generated for this node"); - - // Useful values - Value* True = ConstantInt::get(Type::getInt1Ty(M.getContext()), 1); - Value* False = ConstantInt::get(Type::getInt1Ty(M.getContext()), 0); - - // If kernel struct has not been initialized with kernel function, then fail - assert(K != NULL && "No kernel found!!"); - - DEBUG(errs() << "Generating kernel call code\n"); - - Function* F = N->getFuncPointer(); - - - // Create of clone of F with no instructions. Only the type is the same as F - // without the extra arguments. - Function* F_X86; - - // Clone the function, if we are seeing this function for the first time. We - // only need a clone in terms of type. - ValueToValueMapTy VMap; - - // Create new function with the same type - F_X86 = Function::Create(F->getFunctionType(), F->getLinkage(), F->getName(), &M); - - // Loop over the arguments, copying the names of arguments over. - Function::arg_iterator dest_iterator = F_X86->arg_begin(); - for (Function::const_arg_iterator i = F->arg_begin(), e = F->arg_end(); - i != e; ++i) { - dest_iterator->setName(i->getName()); // Copy the name over... - // Increment dest iterator - ++dest_iterator; - } - - // Add a basic block to this empty function - BasicBlock *BB = BasicBlock::Create(M.getContext(), "entry", F_X86); - ReturnInst* RI = ReturnInst::Create(M.getContext(), - UndefValue::get(F_X86->getReturnType()), BB); - - // FIXME: Adding Index and Dim arguments are probably not required except - // for consistency purpose (DFG2LLVM_X86 does assume that all leaf nodes do - // have those arguments) - - // Add Index and Dim arguments except for the root node - if(!N->isRoot() && !N->getParent()->isChildGraphStreaming()) - F_X86 = addIdxDimArgs(F_X86); - - BB = &*F_X86->begin(); - RI = cast<ReturnInst>(BB->getTerminator()); - - //Add the generated function info to DFNode -// N->setGenFunc(F_X86, visc::CPU_TARGET); - N->addGenFunc(F_X86, visc::SPIR_TARGET, true); - - // Loop over the arguments, to create the VMap - dest_iterator = F_X86->arg_begin(); - for (Function::const_arg_iterator i = F->arg_begin(), e = F->arg_end(); - i != e; ++i) { - // Add mapping to VMap and increment dest iterator - VMap[&*i] = &*dest_iterator; - ++dest_iterator; - } - - /* TODO: Use this code to verufy if this is a good pattern for OCL kernel - - // Sort children in topological order before code generation for kernel call - N->getChildGraph()->sortChildren(); - - // The DFNode N has the property that it has only one child (leaving Entry - // and Exit dummy nodes). This child is the OCL kernel. This simplifies code - // generation for kernel calls significantly. All the inputs to this child - // node would either be constants or from the parent node N. - - assert(N->getChildGraph()->size() == 3 - && "Node expected to have just one non-dummy node!"); - - DFNode* C; - for(DFGraph::children_iterator ci = N->getChildGraph()->begin(), - ce = N->getChildGraph()->end(); ci != ce; ++ci) { - C = *ci; - // Skip dummy node call - if (!C->isDummyNode()) - break; - } - - assert(C->isDummyNode() == false && "Internal Node only contains dummy nodes!"); - - Function* CF = C->getFuncPointer(); - */ - Function* KF = K->KernelLeafNode->getFuncPointer(); - // Initialize context - //DEBUG(errs() << "Initializing context" << "\n"); - //CallInst::Create(llvm_visc_ocl_initContext, None, "", RI); - - DEBUG(errs() << "Initializing commandQ" << "\n"); - // Initialize command queue - switchToTimer(visc_TimerID_SETUP, InitCall); - Value* fileStr = getStringPointer(FileName, InitCall, "Filename"); - DEBUG(errs() << "Kernel Filename constant: " << *fileStr << "\n"); - DEBUG(errs() << "Generating code for kernel - " << K->KernelFunction->getName()<< "\n"); - Value* kernelStr = getStringPointer(K->KernelFunction->getName(), InitCall,"KernelName"); - - Value* LaunchInstArgs[] = {fileStr, kernelStr}; - - DEBUG(errs() << "Inserting launch call" << "\n"); - CallInst* SPIR_Ctx = CallInst::Create(llvm_visc_ocl_launch, - ArrayRef<Value*>(LaunchInstArgs, 2), - "graph"+KF->getName(), - InitCall); - DEBUG(errs() << *SPIR_Ctx << "\n"); - GraphIDAddr = new GlobalVariable(M, - SPIR_Ctx->getType(), - false, - GlobalValue::CommonLinkage, - Constant::getNullValue(SPIR_Ctx->getType()), - "graph"+KF->getName()+".addr"); - DEBUG(errs() << "Store at: " << *GraphIDAddr << "\n"); - StoreInst* SI = new StoreInst(SPIR_Ctx, GraphIDAddr, InitCall); - DEBUG(errs() << *SI << "\n"); - switchToTimer(visc_TimerID_NONE, InitCall); - switchToTimer(visc_TimerID_SETUP, RI); - Value* GraphID = new LoadInst(GraphIDAddr, "graph."+KF->getName(), RI); - - // Iterate over the required input edges of the node and use the visc-rt API - // to set inputs - DEBUG(errs() << "Iterate over input edges of node and insert visc api\n"); - std::vector<OutputPtr> OutputPointers; - // Vector to hold the device memory object that need to be cleared before we release - // context - std::vector<Value*> DevicePointers; - - std::map<unsigned, unsigned> kernelInArgMap = K->getInArgMap(); -/* - for(unsigned i=0; i<KF->getFunctionType()->getNumParams(); i++) { - - // The kernel object gives us the mapping of arguments from kernel launch - // node function (F_X86) to kernel (kernel->KF) - Value* inputVal = getArgumentAt(F_X86, K->getInArgMap()[i]); - -*/ - for(std::map<unsigned, unsigned>::iterator ib = kernelInArgMap.begin(), - ie = kernelInArgMap.end(); ib != ie; ++ib) { - unsigned i = ib->first; - Value* inputVal = getArgumentAt(F_X86, ib->second); - DEBUG(errs() << "\tArgument "<< i<< " = " << *inputVal << "\n"); - - // input value has been obtained. - // Check if input is a scalar value or a pointer operand - // For scalar values such as int, float, etc. the size is simply the size of - // type on target machine, but for pointers, the size of data would be the - // next integer argument - if(inputVal->getType()->isPointerTy()) { - - switchToTimer(visc_TimerID_COPY_PTR, RI); - // Pointer Input - // CheckAttribute - Value* isOutput = (hasAttribute(KF, i, Attribute::Out))? True : False; - Value* isInput = ((hasAttribute(KF, i, Attribute::Out)) - && !(hasAttribute(KF, i, Attribute::In)))? False : True; - - Argument* A = getArgumentAt(KF, i); - if(isOutput == True) { - DEBUG(errs() << *A << " is an OUTPUT argument\n"); - } - if(isInput == True) { - DEBUG(errs() << *A << " is an INPUT argument\n"); - } - - - Value* inputValI8Ptr = CastInst::CreatePointerCast(inputVal, - Type::getInt8PtrTy(M.getContext()), - inputVal->getName()+".i8ptr", - RI); - - // Assert that the pointer argument size (next argument) is in the map - assert(kernelInArgMap.find(i+1) != kernelInArgMap.end()); - - Value* inputSize = getArgumentAt(F_X86, kernelInArgMap[i+1]); - - assert(inputSize->getType() == Type::getInt64Ty(M.getContext()) - && "Pointer type input must always be followed by size (integer type)"); - Value* setInputArgs[] = {GraphID, - inputValI8Ptr, - ConstantInt::get(Type::getInt32Ty(M.getContext()),i), - inputSize, - isInput, - isOutput - }; - Value* d_ptr = CallInst::Create(llvm_visc_ocl_argument_ptr, - ArrayRef<Value*>(setInputArgs, 6), "", RI); - DevicePointers.push_back(d_ptr); - // If this has out attribute, store the returned device pointer in - // memory to read device memory later - if(isOutput == True) OutputPointers.push_back(OutputPtr(inputValI8Ptr, d_ptr, inputSize)); - } - else { - switchToTimer(visc_TimerID_COPY_SCALAR, RI); - // Scalar Input - // Store the scalar value on stack and then pass the pointer to its - // location - AllocaInst* inputValPtr = new AllocaInst(inputVal->getType(), inputVal->getName()+".ptr", RI); - StoreInst* SI = new StoreInst(inputVal, inputValPtr, RI); - - Value* inputValI8Ptr = CastInst::CreatePointerCast(inputValPtr, - Type::getInt8PtrTy(M.getContext()), - inputVal->getName()+".i8ptr", - RI); - - Value* setInputArgs[] = {GraphID, - inputValI8Ptr, - ConstantInt::get(Type::getInt32Ty(M.getContext()),i), - ConstantExpr::getSizeOf(inputVal->getType()) - }; - CallInst::Create(llvm_visc_ocl_argument_scalar, - ArrayRef<Value*>(setInputArgs, 4), "", RI); - } - } - - DEBUG(errs() << "Setup shared memory arguments of node and insert visc api\n"); - - // Check to see if all the allocation sizes are constant (determined - // statically) - bool constSizes = true; - for (auto& e: K->getSharedInArgMap()) { - constSizes &= isa<Constant>(e.second.first); - } - - // If the sizes are all constant - if (constSizes) { - for (auto& e: K->getSharedInArgMap()) { - unsigned argNum = e.first; - Value* allocSize = e.second.first; - - DEBUG(errs() << "\tLocal Memory at "<< argNum << ", size = " << *allocSize << "\n"); - - if (KF->getFunctionType()->getParamType(argNum)->isPointerTy()) { - // Shared memory ptr argument - scalar at size position - switchToTimer(visc_TimerID_COPY_SCALAR, RI); - - assert(isa<Constant>(allocSize) && "Constant shared memory size is expected"); - - Value* setInputArgs[] = {GraphID, - ConstantInt::get(Type::getInt32Ty(M.getContext()),argNum), - allocSize - }; - CallInst::Create(llvm_visc_ocl_argument_shared, - ArrayRef<Value*>(setInputArgs, 3), "", RI); - } - else { - // Sharem memory size argument - scalar at address position - switchToTimer(visc_TimerID_COPY_SCALAR, RI); - // Store the scalar value on stack and then pass the pointer to its - // location - AllocaInst* allocSizePtr = new AllocaInst(allocSize->getType(), - allocSize->getName()+".sharedMem.ptr", RI); - StoreInst* SI = new StoreInst(allocSize, allocSizePtr, RI); - - Value* allocSizeI8Ptr = CastInst::CreatePointerCast(allocSizePtr, - Type::getInt8PtrTy(M.getContext()), - allocSize->getName()+".sharedMem.i8ptr", - RI); - - Value* setInputArgs[] = {GraphID, - allocSizeI8Ptr, - ConstantInt::get(Type::getInt32Ty(M.getContext()),argNum), - ConstantExpr::getSizeOf(allocSize->getType()) - }; - CallInst::Create(llvm_visc_ocl_argument_scalar, - ArrayRef<Value*>(setInputArgs, 4), "", RI); - } - } - } else { - - Function *F_alloc = K->AllocationFunction; - StructType *FAllocRetTy = dyn_cast<StructType>(F_alloc->getReturnType()); - assert(FAllocRetTy && "Allocation node with no struct return type"); - - std::vector<Value *> AllocInputArgs; - for (unsigned i = 0; i < K->allocInArgMap.size(); i++) { - AllocInputArgs.push_back(getArgumentAt(F_X86, K->allocInArgMap.at(i))); - } - - CallInst *CI = CallInst::Create(F_alloc, AllocInputArgs, "", RI); - std::vector<ExtractValueInst *> ExtractValueInstVec; - for (unsigned i = 1; i < FAllocRetTy->getNumElements(); i += 2) { - ExtractValueInst *EI = ExtractValueInst::Create(CI, i, "", RI); - ExtractValueInstVec.push_back(EI); - } - - for (auto& e: K->getSharedInArgMap()) { - unsigned argNum = e.first; - Value* allocSize = ExtractValueInstVec[e.second.second/2]; - - DEBUG(errs() << "\tLocal Memory at "<< argNum << ", size = " << *allocSize << "\n"); - - if (KF->getFunctionType()->getParamType(argNum)->isPointerTy()) { - // Shared memory ptr argument - scalar at size position - switchToTimer(visc_TimerID_COPY_SCALAR, RI); - - Value* setInputArgs[] = {GraphID, - ConstantInt::get(Type::getInt32Ty(M.getContext()),argNum), - allocSize - }; - CallInst::Create(llvm_visc_ocl_argument_shared, - ArrayRef<Value*>(setInputArgs, 3), "", RI); - } - else { - // Sharem memory size argument - scalar at address position - switchToTimer(visc_TimerID_COPY_SCALAR, RI); - // Store the scalar value on stack and then pass the pointer to its - // location - AllocaInst* allocSizePtr = new AllocaInst(allocSize->getType(), - allocSize->getName()+".sharedMem.ptr", RI); - StoreInst* SI = new StoreInst(allocSize, allocSizePtr, RI); - - Value* allocSizeI8Ptr = CastInst::CreatePointerCast(allocSizePtr, - Type::getInt8PtrTy(M.getContext()), - allocSize->getName()+".sharedMem.i8ptr", - RI); - - Value* setInputArgs[] = {GraphID, - allocSizeI8Ptr, - ConstantInt::get(Type::getInt32Ty(M.getContext()),argNum), - ConstantExpr::getSizeOf(allocSize->getType()) - }; - CallInst::Create(llvm_visc_ocl_argument_scalar, - ArrayRef<Value*>(setInputArgs, 4), "", RI); - } - } - } - - - DEBUG(errs() << "Setup output edges of node and insert visc api\n"); - - // Set output if struct is not an empty struct - StructType* OutputTy = K->KernelLeafNode->getOutputType(); - std::vector<Value*> d_Outputs; - if(!OutputTy->isEmptyTy()) { - switchToTimer(visc_TimerID_COPY_PTR, RI); - // Not an empty struct - // Iterate over all elements of the struct and put them in - for(unsigned i=0; i < OutputTy->getNumElements(); i++) { - unsigned outputIndex = KF->getFunctionType()->getNumParams()+i; - Value* setOutputArgs[] = {GraphID, - ConstantInt::get(Type::getInt32Ty(M.getContext()),outputIndex), - ConstantExpr::getSizeOf(OutputTy->getElementType(i))}; - - CallInst* d_Output = CallInst::Create(llvm_visc_ocl_output_ptr, - ArrayRef<Value*>(setOutputArgs, 3), - "d_output."+KF->getName(), - RI); - d_Outputs.push_back(d_Output); - } - } - - // Enqueue kernel - // Need work dim, localworksize, globalworksize - // Allocate size_t[numDims] space on stack. Store the work group sizes and - // pass it as an argument to ExecNode - - switchToTimer(visc_TimerID_MISC, RI); - Value *workDim, *LocalWGPtr, *GlobalWGPtr; - getExecuteNodeParams(M, workDim, LocalWGPtr, GlobalWGPtr, K, VMap, RI); - switchToTimer(visc_TimerID_KERNEL, RI); - Value* ExecNodeArgs[] = {GraphID, - workDim, - LocalWGPtr, - GlobalWGPtr - }; - CallInst* Event = CallInst::Create(llvm_visc_ocl_executeNode, - ArrayRef<Value*>(ExecNodeArgs, 4), - "event."+KF->getName(), - RI); - DEBUG(errs() << "Execute Node Call: " << *Event << "\n"); - - // Wait for Kernel to Finish - CallInst::Create(llvm_visc_ocl_wait, - ArrayRef<Value*>(GraphID), - "", - RI); - - switchToTimer(visc_TimerID_READ_OUTPUT, RI); - // Read Output Struct if not empty - if(!OutputTy->isEmptyTy()) { - std::vector<Value*>h_Outputs; - Value* KernelOutput = UndefValue::get(OutputTy); - for(unsigned i=0; i < OutputTy->getNumElements(); i++) { - Value* GetOutputArgs[] = {GraphID, - Constant::getNullValue(Type::getInt8PtrTy(M.getContext())), - d_Outputs[i], - ConstantExpr::getSizeOf(OutputTy->getElementType(i)) - }; - CallInst* h_Output = CallInst::Create(llvm_visc_ocl_getOutput, - ArrayRef<Value*>(GetOutputArgs, 4), - "h_output."+KF->getName()+".addr", - RI); - // Read each device pointer listed in output struct - // Load the output struct - CastInst* BI = BitCastInst::CreatePointerCast(h_Output, - OutputTy->getElementType(i)->getPointerTo(), "output.ptr", RI); - - Value* OutputElement = new LoadInst(BI, "output."+KF->getName(), RI); - KernelOutput = InsertValueInst::Create(KernelOutput, OutputElement, ArrayRef<unsigned>(i), - KF->getName()+"output", RI); - } - OutputMap[K->KernelLeafNode] = KernelOutput; - } - - // Read all the pointer arguments which had side effects i.e., had out - // attribute - DEBUG(errs() << "Output Pointers : " << OutputPointers.size() << "\n"); - // FIXME: Not reading output pointers anymore as we read them when data is - // actually requested - /*for(auto output: OutputPointers) { - DEBUG(errs() << "Read: " << *output.d_ptr << "\n"); - DEBUG(errs() << "\tTo: " << *output.h_ptr << "\n"); - DEBUG(errs() << "\t#bytes: " << *output.bytes << "\n"); - - Value* GetOutputArgs[] = {GraphID, output.h_ptr, output.d_ptr, output.bytes}; - CallInst* CI = CallInst::Create(llvm_visc_ocl_getOutput, - ArrayRef<Value*>(GetOutputArgs, 4), - "", RI); - }*/ - switchToTimer(visc_TimerID_MEM_FREE, RI); - // Clear Context and free device memory - DEBUG(errs() << "Clearing context" << "\n"); - // Free Device Memory - for(auto d_ptr: DevicePointers) { - CallInst::Create(llvm_visc_ocl_free, ArrayRef<Value*>(d_ptr), "", RI); - } - switchToTimer(visc_TimerID_CLEAR_CTX, CleanupCall); - // Clear Context - LoadInst* LI = new LoadInst(GraphIDAddr, "", CleanupCall); - CallInst::Create(llvm_visc_ocl_clearContext, ArrayRef<Value*>(LI), "", CleanupCall); - switchToTimer(visc_TimerID_NONE, CleanupCall); - - switchToTimer(visc_TimerID_MISC, RI); - DEBUG(errs() << "*** Generating epilogue code for the function****\n"); - // Generate code for output bindings - // Get Exit node - DFNode* C = N->getChildGraph()->getExit(); - // Get OutputType of this node - StructType* OutTy = N->getOutputType(); - Value *retVal = UndefValue::get(F_X86->getReturnType()); - // Find the kernel's output arg map, to use instead of the bindings - std::vector<unsigned> outArgMap = kernel->getOutArgMap(); - // Find all the input edges to exit node - for (unsigned i=0; i < OutTy->getNumElements(); i++) { - DEBUG(errs() << "Output Edge " << i << "\n"); - // Find the incoming edge at the requested input port - DFEdge* E = C->getInDFEdgeAt(i); - - assert(E && "No Binding for output element!"); - // Find the Source DFNode associated with the incoming edge - DFNode* SrcDF = E->getSourceDF(); - - DEBUG(errs() << "Edge source -- " << SrcDF->getFuncPointer()->getName() << "\n"); - - // If Source DFNode is a dummyNode, edge is from parent. Get the - // argument from argument list of this internal node - Value* inputVal; - if(SrcDF->isEntryNode()) { - inputVal = getArgumentAt(F_X86, i); - DEBUG(errs() << "Argument "<< i<< " = " << *inputVal << "\n"); - } - else { - // edge is from a internal node - // Check - code should already be generated for this source dfnode - // FIXME: Since the 2-level kernel code gen has aspecific structure, we - // can assume the SrcDF is same as Kernel Leaf node. - // Use outArgMap to get correct mapping - SrcDF = K->KernelLeafNode; - assert(OutputMap.count(SrcDF) - && "Source node call not found. Dependency violation!"); - - // Find Output Value associated with the Source DFNode using OutputMap - Value* CI = OutputMap[SrcDF]; - - // Extract element at source position from this call instruction - std::vector<unsigned> IndexList; - // i is the destination of DFEdge E - // Use the mapping instead of the bindings -// IndexList.push_back(E->getSourcePosition()); - IndexList.push_back(outArgMap[i]); - DEBUG(errs() << "Going to generate ExtarctVal inst from "<< *CI <<"\n"); - ExtractValueInst* EI = ExtractValueInst::Create(CI, IndexList, - "",RI); - inputVal = EI; - } - std::vector<unsigned> IdxList; - IdxList.push_back(i); - retVal = InsertValueInst::Create(retVal, inputVal, IdxList, "", RI); - } - - DEBUG(errs() << "Extracted all\n"); - switchToTimer(visc_TimerID_NONE, RI); - retVal->setName("output"); - ReturnInst* newRI = ReturnInst::Create(F_X86->getContext(), retVal); - ReplaceInstWithInst(RI, newRI); -} - - -// Right now, only targeting the one level case. In general, device functions -// can return values so we don't need to change them -void CGT_SPIR::codeGen(DFInternalNode* N) { - errs () << "Inside node: " << N->getFuncPointer()->getName() << "\n"; - if(KernelLaunchNode == NULL) - errs () << "No kernel launch node\n"; - else { - errs () << "KernelLaunchNode: " << KernelLaunchNode->getFuncPointer()->getName() << "\n"; - } - - - if (!KernelLaunchNode) { - DEBUG(errs() << "No code generated (host code for kernel launch complete).\n"); - return; - } - - if (N == KernelLaunchNode) { - DEBUG(errs() << "Found kernel launch node. Generating host code.\n"); - //TODO - - // Now the remaining nodes to be visited should be ignored - KernelLaunchNode = NULL; - DEBUG(errs() << "Insert Runtime calls\n"); - insertRuntimeCalls(N, kernel, getSPIRFilename(M)); - - } else { - DEBUG(errs() << "Found intermediate node. Getting size parameters.\n"); - // Keep track of the arguments order. - std::map<unsigned, unsigned> inmap1 = N->getInArgMap(); - std::map<unsigned, unsigned> inmap2 = kernel->getInArgMap(); - // TODO: Structure assumed: one thread node, one allocation node (at most), - // TB node - std::map<unsigned, unsigned> inmapFinal; - for (std::map<unsigned, unsigned>::iterator ib = inmap2.begin(), ie = inmap2.end(); - ib != ie; ++ib) { - inmapFinal[ib->first] = inmap1[ib->second]; - } - - kernel->setInArgMap(inmapFinal); - - // Keep track of the output arguments order. - std::vector<unsigned> outmap1 = N->getOutArgMap(); - std::vector<unsigned> outmap2 = kernel->getOutArgMap(); - - // TODO: Change when we have incoming edges to the dummy exit node from more - // than one nodes. In this case, the number of bindings is the same, but - // their destination position, thus the index in outmap1, is not - // 0 ... outmap2.size()-1 - // The limit is the size of outmap2, because this is the number of kernel - // output arguments for which the mapping matters - // For now, it reasonable to assume that all the kernel arguments are returned, - // maybe plys some others from other nodes, thus outmap2.size() <= outmap1.size() - for (unsigned i = 0; i < outmap2.size(); i++) { - outmap1[i] = outmap2[outmap1[i]]; - } - kernel->setOutArgMap(outmap1); - - // Track the source of local dimlimits for the kernel - // Dimension limit can either be a constant or an argument of parent - // function. Since Internal node would no longer exist, we need to insert the - // localWGSize with values from the parent of N. - std::vector<Value*> localWGSizeMapped; - for (unsigned i = 0; i < kernel->localWGSize.size(); i++) { - if (isa<Constant>(kernel->localWGSize[i])) { - // if constant, use as it is - localWGSizeMapped.push_back(kernel->localWGSize[i]); - } - else if (Argument* Arg = dyn_cast<Argument>(kernel->localWGSize[i])) { - // if argument, find the argument location in N. Use InArgMap of N to - // find the source location in Parent of N. Retrieve the argument from - // parent to insert in the vector. - unsigned argNum = Arg->getArgNo(); - // This argument will be coming from the parent node, not the allocation - // Node - assert(N->getInArgMap().find(argNum) != N->getInArgMap().end()); - - unsigned parentArgNum = N->getInArgMap()[argNum]; - Argument* A = getArgumentAt(N->getParent()->getFuncPointer(), parentArgNum); - localWGSizeMapped.push_back(A); - } - else { - assert(false && "LocalWGsize using value which is neither argument nor constant!"); - } - } - // Update localWGSize vector of kernel - kernel->setLocalWGSize(localWGSizeMapped); - } - -} - -//static bool checkPreferredTarget(DFNode* N, visc::Target T) { - //Function* F = N->getFuncPointer(); - //Module* M = F->getParent(); - //NamedMDNode* HintNode; - //switch (T) { - //case visc::GPU_TARGET: - //HintNode = M->getOrInsertNamedMetadata("visc_hint_gpu"); - //break; - //case visc::SPIR_TARGET: - //HintNode = M->getOrInsertNamedMetadata("visc_hint_spir"); - //break; - //case visc::CPU_TARGET: - //HintNode = M->getOrInsertNamedMetadata("visc_hint_cpu"); - //break; - //default: - //llvm_unreachable("Target Not supported yet!"); - //} - //for (unsigned i = 0; i < HintNode->getNumOperands(); i++) { - //MDNode* MetaNode = HintNode->getOperand(i); - //if(F == MetaNode->getOperand(0)) - //return true; - //} - //return false; -//} - -void CGT_SPIR::codeGen(DFLeafNode* N) { - - // Skip code generation if it is a dummy node - if(N->isDummyNode()) { - DEBUG(errs() << "Skipping dummy node\n"); - return; - } - - // Skip code generation if it is an allocation node - if(N->isAllocationNode()) { - DEBUG(errs() << "Skipping allocation node\n"); - return; - } - - // Generate code only if it has the right hint -// if(!checkPreferredTarget(N, visc::SPIR_TARGET)) { -// errs() << "Skipping node: "<< N->getFuncPointer()->getName() << "\n"; -// return; -// } - if(!preferredTargetIncludes(N, visc::SPIR_TARGET)) { - errs() << "Skipping node: "<< N->getFuncPointer()->getName() << "\n"; - return; - } - - // Checking which node is the kernel launch - DFNode* PNode = N->getParent(); - int pLevel = PNode->getLevel(); - int pReplFactor = PNode->getNumOfDim(); - - // Choose parent node as kernel launch if: - // (1) Parent is the top level node i.e., Root of DFG - // OR - // (2) Parent does not have multiple instances - errs() << "pLevel = " << pLevel << "\n"; - errs() << "pReplFactor = " << pReplFactor << "\n"; - - if (!pLevel || !pReplFactor) { - errs() << "*************** Kernel Gen: 1-Level Hierarchy **************\n"; - KernelLaunchNode = PNode; - errs() << "Setting Kernel Launch Node\n"; - kernel = new Kernel(NULL, - N, - N->getInArgMap(), - N->getSharedInArgMap(), - N->getOutArgMap(), - N->getNumOfDim(), - N->getDimLimits()); - } - else { - // Converting a 2-level DFG to opencl kernel - errs() << "*************** Kernel Gen: 2-Level Hierarchy **************\n"; - KernelLaunchNode = PNode->getParent(); - assert((PNode->getNumOfDim() == N->getNumOfDim()) && "Dimension number must match"); - // Contains the instructions generating the kernel configuration parameters - kernel = new Kernel(NULL, // kernel function - N, // kernel leaf node - N->getInArgMap(), // kenel argument mapping - N->getSharedInArgMap(), - N->getOutArgMap(), // kernel output mapping from the leaf to the interemediate node - PNode->getNumOfDim(), // gridDim - PNode->getDimLimits(),// grid size - N->getNumOfDim(), // blockDim - N->getDimLimits()); // block size - - } - - std::vector<IntrinsicInst *> IItoRemove; - BuildDFG::HandleToDFNode Leaf_HandleToDFNodeMap; - - // Get the function associated with the dataflow node - Function *F = N->getFuncPointer(); - - // Look up if we have visited this function before. If we have, then just - // get the cloned function pointer from DFNode. Otherwise, create the cloned - // function and add it to the DFNode GenFunc. - Function *F_spir = N->getGenFuncForTarget(visc::SPIR_TARGET); - assert(F_spir == NULL && "Error: Visiting a node for which code already generated"); - - // Clone the function - ValueToValueMapTy VMap; - - Twine FName = F->getName(); - F_spir = CloneFunction(F, VMap); - F_spir->setName(FName+"_spir"); - errs() << "Old Function Name: " << F->getName() << "\n"; - errs() << "New Function Name: " << F_spir->getName() << "\n"; - - F_spir->removeFromParent(); - - // Insert the cloned function into the kernels module - KernelM->getFunctionList().push_back(F_spir); - - //TODO: Iterate over all the instructions of F_spir and identify the - //callees and clone them into this module. - DEBUG(errs() << *F_spir->getType()); - DEBUG(errs() << *F_spir); - - //Add generated function info to DFNode - //N->setGenFunc(F_spir, visc::SPIR_TARGET); - - F_spir = transformFunctionToVoid(F_spir); - - // Add generated function info to DFNode - //N->setGenFunc(F_spir, visc::SPIR_TARGET); - - removeInOutAttributes(F_spir); - - //Add generated function info to DFNode - N->addGenFunc(F_spir, visc::SPIR_TARGET, false); - - DEBUG(errs() << "Removing all attributes from Kernel Function and adding nounwind\n"); - F_spir->removeAttributes(AttributeSet::FunctionIndex, F_spir->getAttributes().getFnAttributes()); - F_spir->addAttribute(AttributeSet::FunctionIndex, Attribute::NoUnwind); - - - //FIXME: For now, assume only one allocation node - kernel->AllocationNode = NULL; - - for (DFNode::const_indfedge_iterator ieb = N->indfedge_begin(), iee = N->indfedge_end(); - ieb != iee; ++ieb) { - DFNode *SrcDFNode = (*ieb)->getSourceDF(); - DEBUG(errs() << "Found edge from node: " << " " << SrcDFNode->getFuncPointer()->getName() << "\n"); - DEBUG(errs() << "Current Node: " << N->getFuncPointer()->getName() << "\n"); - DEBUG(errs() << "isAllocationNode = "<< SrcDFNode->isAllocationNode() << "\n"); - if (!SrcDFNode->isDummyNode()) { - assert(SrcDFNode->isAllocationNode()); - kernel->AllocationNode = dyn_cast<DFLeafNode>(SrcDFNode); - kernel->allocInArgMap = SrcDFNode->getInArgMap(); - break; - } - } - - // Vector for shared memory arguments - std::vector<unsigned> SharedMemArgs; - - // If no allocation node was found, SharedMemArgs is empty - if (kernel->AllocationNode) { - - ValueToValueMapTy VMap; - Function *F_alloc = CloneFunction(kernel->AllocationNode->getFuncPointer(), VMap); - //F_alloc->removeFromParent(); - // Insert the cloned function into the kernels module - //M.getFunctionList().push_back(F_alloc); - - std::vector<IntrinsicInst *> ViscMallocInstVec; - findIntrinsicInst(F_alloc, Intrinsic::visc_malloc, ViscMallocInstVec); - - for (unsigned i = 0; i < ViscMallocInstVec.size(); i++) { - IntrinsicInst *II = ViscMallocInstVec[i]; - assert(II->hasOneUse() && "visc_malloc result is used more than once"); - II->replaceAllUsesWith(ConstantPointerNull::get(Type::getInt8PtrTy(M.getContext()))); - II->eraseFromParent(); - } - kernel->AllocationFunction = F_alloc; - - // This could be used to check that the allocation node has the appropriate - // number of fields in its return struct -/* - ReturnInst *RI = ReturnInstVec[0]; - Value *RetVal = RI->getReturnValue(); - Type *RetTy = RetVal->getType(); - StructType *RetStructTy = dyn_cast<StructType>(RetTy); - assert(RetStructTy && "Allocation node does not return a struct type"); - unsigned numFields = RetStructTy->getNumElements(); -*/ - std::map<unsigned, std::pair<Value*, unsigned> > sharedInMap = kernel->getSharedInArgMap(); - AllocationNodeProperty* APN = - (AllocationNodeProperty*) kernel->AllocationNode->getProperty(DFNode::Allocation); - for (auto& AllocPair: APN->getAllocationList()) { - unsigned destPos = AllocPair.first->getDestPosition(); - unsigned srcPos = AllocPair.first->getSourcePosition(); - SharedMemArgs.push_back(destPos); - sharedInMap[destPos] = std::pair<Value *, unsigned>(AllocPair.second, srcPos+1); - sharedInMap[destPos+1] = std::pair<Value *, unsigned>(AllocPair.second, srcPos+1); - } - kernel->setSharedInArgMap(sharedInMap); - } - std::sort(SharedMemArgs.begin(), SharedMemArgs.end()); - - // All pointer args which are not shared memory pointers have to be moved to - // global address space - unsigned argIndex = 0; - std::vector<unsigned> GlobalMemArgs; - for(auto& Arg: F_spir->getArgumentList()) { - if (Arg.getType()->isPointerTy()) { - // If the arguement is already chosen for shared memory arguemnt list, skip. - // Else put it in Global memory arguement list - if(std::count(SharedMemArgs.begin(), SharedMemArgs.end(), argIndex) == 0) { - GlobalMemArgs.push_back(argIndex); - } - } - argIndex++; - } - std::sort(GlobalMemArgs.begin(), GlobalMemArgs.end()); - - /* At this point, we assume that chescks for the fact that SharedMemArgs only - contains pointer arguments to GLOBAL_ADDRSPACE have been performed by the - analysis pass */ - - F_spir = changeArgAddrspace(F_spir, SharedMemArgs, SHARED_ADDRSPACE); - removeAttributeAtArguments(F_spir, SharedMemArgs, Attribute::NoCapture); - F_spir = changeArgAddrspace(F_spir, GlobalMemArgs, GLOBAL_ADDRSPACE); - - - // Go through all the instructions - for (inst_iterator i = inst_begin(F_spir), e = inst_end(F_spir); i != e; ++i) { - Instruction *I = &(*i); - // Leaf nodes should not contain VISC graph intrinsics or launch - assert(!BuildDFG::isViscLaunchIntrinsic(I) && "Launch intrinsic within a dataflow graph!"); - assert(!BuildDFG::isViscGraphIntrinsic(I) && "VISC graph intrinsic within a leaf dataflow node!"); - - if (BuildDFG::isViscIntrinsic(I)) { - IntrinsicInst* II = dyn_cast<IntrinsicInst>(I); - IntrinsicInst* ArgII; - DFNode* ArgDFNode; - - /************************ Handle VISC Query intrinsics ************************/ - - switch (II->getIntrinsicID()) { - /**************************** llvm.visc.getNode() *****************************/ - case Intrinsic::visc_getNode: { - DEBUG(errs() << F_spir->getName() << "\t: Handling getNode\n"); - // add mapping <intrinsic, this node> to the node-specific map - Leaf_HandleToDFNodeMap[II] = N; - IItoRemove.push_back(II); - } - break; - /************************* llvm.visc.getParentNode() **************************/ - case Intrinsic::visc_getParentNode: { - DEBUG(errs() << F_spir->getName() << "\t: Handling getParentNode\n"); - // get the parent node of the arg node - // get argument node - ArgII = cast<IntrinsicInst>((II->getOperand(0))->stripPointerCasts()); - ArgDFNode = Leaf_HandleToDFNodeMap[ArgII]; - // get the parent node of the arg node - // Add mapping <intrinsic, parent node> to the node-specific map - // the argument node must have been added to the map, orelse the - // code could not refer to it - Leaf_HandleToDFNodeMap[II] = ArgDFNode->getParent(); - - IItoRemove.push_back(II); - } - break; - /*************************** llvm.visc.getNumDims() ***************************/ - case Intrinsic::visc_getNumDims: { - DEBUG(errs() << F_spir->getName() << "\t: Handling getNumDims\n"); - // get node from map - // get the appropriate field - ArgII = cast<IntrinsicInst>((II->getOperand(0))->stripPointerCasts()); - ArgDFNode = Leaf_HandleToDFNodeMap[ArgII]; - int numOfDim = ArgDFNode->getNumOfDim(); - DEBUG(errs() << "\t Got node dimension : " << numOfDim << "\n"); - IntegerType* IntTy = Type::getInt32Ty(KernelM->getContext()); - ConstantInt* numOfDimConstant = ConstantInt::getSigned(IntTy, (int64_t) numOfDim); - - // Replace the result of the intrinsic with the computed value - II->replaceAllUsesWith(numOfDimConstant); - - IItoRemove.push_back(II); - } - break; - /*********************** llvm.visc.getNodeInstanceID() ************************/ - case Intrinsic::visc_getNodeInstanceID_x: - case Intrinsic::visc_getNodeInstanceID_y: - case Intrinsic::visc_getNodeInstanceID_z: { - DEBUG(errs() << F_spir->getName() << "\t: Handling getNodeInstanceID\n"); - ArgII = cast<IntrinsicInst>((II->getOperand(0))->stripPointerCasts()); - ArgDFNode = Leaf_HandleToDFNodeMap[ArgII]; - assert(ArgDFNode && "Arg node is NULL"); - // A leaf node always has a parent - DFNode* ParentDFNode = ArgDFNode->getParent(); - assert(ParentDFNode && "Parent node of a leaf is NULL"); - - // Get the number associated with the required dimension - // FIXME: The order is important! - // These three intrinsics need to be consecutive x,y,z - uint64_t dim = II->getIntrinsicID() - - Intrinsic::visc_getNodeInstanceID_x; - assert((dim >= 0) && (dim < 3) && "Invalid dimension argument"); - DEBUG(errs() << "\t dimension = " << dim << "\n"); - - // Argument of the function to be called - ConstantInt * DimConstant = - ConstantInt::get(Type::getInt32Ty(KernelM->getContext()), dim); - //ArrayRef<Value *> Args(DimConstant); - - // The following is to find which function to call - Function * OpenCLFunction; - int parentLevel = N->getParent()->getLevel(); - int parentReplFactor = N->getParent()->getNumOfDim(); - DEBUG(errs() << "Parent Level = " << parentLevel << "\n"); - DEBUG(errs() << "Parent Repl factor = " << parentReplFactor << "\n"); - - FunctionType* FT = - FunctionType::get(Type::getInt64Ty(KernelM->getContext()), - ArrayRef<Type*>(Type::getInt32Ty(KernelM->getContext())), - false); - - if ((!parentLevel || !parentReplFactor) && ArgDFNode == N) { - // We only have one level in the hierarchy or the parent node is not - // replicated. This indicates that the parent node is the kernel - // launch, so we need to specify a global id. - // We can translate this only if the argument is the current node - // itself - DEBUG(errs() << "Substitute with get_global_id()\n"); - DEBUG(errs() << *II << "\n"); - OpenCLFunction = cast<Function> - (KernelM->getOrInsertFunction(getMangledName("get_global_id"), FT)); - } else if (Leaf_HandleToDFNodeMap[ArgII] == N) { - // We are asking for this node's id with respect to its parent - // this is a local id call - OpenCLFunction = cast<Function> - (KernelM->getOrInsertFunction(getMangledName("get_local_id"), FT)); - } else if (Leaf_HandleToDFNodeMap[ArgII] == N->getParent()) { - // We are asking for this node's parent's id with respect to its - // parent: this is a group id call - OpenCLFunction = cast<Function> - (KernelM->getOrInsertFunction(getMangledName("get_group_id"), FT)); - } else { - errs() << N->getFuncPointer()->getName() << "\n"; - errs() << N->getParent()->getFuncPointer()->getName() << "\n"; - errs() << *II << "\n"; - - assert(false && "Unable to translate getNodeInstanceID intrinsic"); - } - - // Create call instruction, insert it before the intrinsic and truncate - // the output to 32 bits and replace all the uses of the previous - // instruction with the new one - CallInst* CI = CallInst::Create(OpenCLFunction, DimConstant, "", II); - II->replaceAllUsesWith(CI); - - IItoRemove.push_back(II); - } - break; - /********************** llvm.visc.getNumNodeInstances() ***********************/ - case Intrinsic::visc_getNumNodeInstances_x: - case Intrinsic::visc_getNumNodeInstances_y: - case Intrinsic::visc_getNumNodeInstances_z: { -//TODO: think about whether this is the best way to go -// there are hw specific registers. therefore it is good to have the intrinsic -// but then, why do we need to keep that info in the graph? -// (only for the kernel configuration during the call) - - DEBUG(errs() << F_spir->getName() << "\t: Handling getNumNodeInstances\n"); - ArgII = cast<IntrinsicInst>((II->getOperand(0))->stripPointerCasts()); - ArgDFNode = Leaf_HandleToDFNodeMap[ArgII]; - // A leaf node always has a parent - DFNode* ParentDFNode = ArgDFNode->getParent(); - assert(ParentDFNode && "Parent node of a leaf is NULL"); - - // Get the number associated with the required dimension - // FIXME: The order is important! - // These three intrinsics need to be consecutive x,y,z - uint64_t dim = II->getIntrinsicID() - - Intrinsic::visc_getNumNodeInstances_x; - assert((dim >= 0) && (dim < 3) && "Invalid dimension argument"); - DEBUG(errs() << "\t dimension = " << dim << "\n"); - - // Argument of the function to be called - ConstantInt * DimConstant = - ConstantInt::get(Type::getInt32Ty(KernelM->getContext()), dim); - //ArrayRef<Value *> Args(DimConstant); - - // The following is to find which function to call - Function * OpenCLFunction; - int parentLevel = ParentDFNode->getLevel(); - int parentReplFactor = ParentDFNode->getNumOfDim(); - - FunctionType* FT = - FunctionType::get(Type::getInt64Ty(KernelM->getContext()), - Type::getInt32Ty(KernelM->getContext()), - false); - if ((N == ArgDFNode) && (!parentLevel || !parentReplFactor)) { - // We only have one level in the hierarchy or the parent node is not - // replicated. This indicates that the parent node is the kernel - // launch, so the instances are global_size (gridDim x blockDim) - OpenCLFunction = cast<Function> - (KernelM->getOrInsertFunction(getMangledName("get_global_size"), FT)); - } else if (Leaf_HandleToDFNodeMap[ArgII] == N) { - // We are asking for this node's instances - // this is a local size (block dim) call - OpenCLFunction = cast<Function> - (KernelM->getOrInsertFunction(getMangledName("get_local_size"), FT)); - } else if (Leaf_HandleToDFNodeMap[ArgII] == N->getParent()) { - // We are asking for this node's parent's instances - // this is a (global_size/local_size) (grid dim) call - OpenCLFunction = cast<Function> - (KernelM->getOrInsertFunction(getMangledName("get_num_groups"), FT)); - } else { - assert(false && "Unable to translate getNumNodeInstances intrinsic"); - } - - // Create call instruction, insert it before the intrinsic and truncate - // the output to 32 bits and replace all the uses of the previous - // instruction with the new one - CallInst* CI = CallInst::Create(OpenCLFunction, DimConstant, "", II); - II->replaceAllUsesWith(CI); - - IItoRemove.push_back(II); - } - break; - case Intrinsic::visc_barrier: - { - DEBUG(errs() << F_spir->getName() << "\t: Handling barrier\n"); - DEBUG(errs() << "Substitute with barrier()\n"); - DEBUG(errs() << *II << "\n"); - FunctionType* FT = FunctionType::get(Type::getVoidTy(KernelM->getContext()), - std::vector<Type*>(1, Type::getInt32Ty(KernelM->getContext())), - false); - Function* OpenCLFunction = cast<Function> - (KernelM->getOrInsertFunction(getMangledName("barrier"), FT)); - CallInst* CI = CallInst::Create(OpenCLFunction, - ArrayRef<Value*>(ConstantInt::get(Type::getInt32Ty(KernelM->getContext()), 1)), - "", II); - II->replaceAllUsesWith(CI); - IItoRemove.push_back(II); - } - break; - case Intrinsic::visc_atomic_cmpxchg: - case Intrinsic::visc_atomic_add: - case Intrinsic::visc_atomic_sub: - case Intrinsic::visc_atomic_xchg: - case Intrinsic::visc_atomic_min: - case Intrinsic::visc_atomic_umin: - case Intrinsic::visc_atomic_max: - case Intrinsic::visc_atomic_umax: - case Intrinsic::visc_atomic_and: - case Intrinsic::visc_atomic_or: - case Intrinsic::visc_atomic_xor: - case Intrinsic::visc_atomic_inc: - case Intrinsic::visc_atomic_dec: - { - DEBUG(errs() << *II << "\n"); - // Only have support for i32 atomic intrinsics - assert(II->getType() == Type::getInt32Ty(II->getContext()) - && "Only support i32 atomic intrinsics for now"); - // Substitute with appropriate atomic builtin - assert(II->getNumArgOperands() == 2 && "Expecting 2 operands for these atomics"); - - Value* Ptr = II->getArgOperand(0); - Value* Val = II->getArgOperand(1); - assert(Ptr->getType()->isPointerTy() - && "First argument of supported atomics is expected to be a pointer"); - PointerType* PtrTy = cast<PointerType>(Ptr->getType()); - if(PtrTy != Type::getInt32PtrTy(II->getContext(), PtrTy->getAddressSpace())) { - Ptr = CastInst::CreatePointerCast(Ptr, - Type::getInt32PtrTy(II->getContext(), - PtrTy->getAddressSpace()), "", II); - } - - StringRef name = getAtomicOpName(II->getIntrinsicID(), PtrTy->getAddressSpace()); - - Type* paramTypes[] = { Type::getInt32PtrTy(II->getContext(), PtrTy->getAddressSpace()), - Type::getInt32Ty(KernelM->getContext()) - }; - FunctionType* AtomicFT = FunctionType::get(II->getType(), - ArrayRef<Type*>(paramTypes, 2), - false); - Function* AtomicFunction = cast<Function> - (KernelM->getOrInsertFunction(name, AtomicFT)); - Value* atomicArgs[] = { Ptr, Val }; - CallInst* AtomicInst = CallInst::Create(AtomicFunction, - ArrayRef<Value*>(atomicArgs, 2), - "", II); - - DEBUG(errs() << "Substitute with: " << *AtomicInst << "\n"); - II->replaceAllUsesWith(AtomicInst); - IItoRemove.push_back(II); - } - break; - default: - assert(false && "Unknown VISC Intrinsic!"); - break; - } - - } - else if(CallInst* CI = dyn_cast<CallInst>(I)) { - DEBUG(errs() << "Found a call: " << *CI << "\n"); - Function* calleeF = cast<Function>(CI->getCalledValue()->stripPointerCasts()); - if(calleeF->isDeclaration()) { - // Add the declaration to kernel module - DEBUG(errs() << "Adding declaration to Kernel module: " << *calleeF << "\n"); - KernelM->getOrInsertFunction(calleeF->getName(), calleeF->getFunctionType()); - if(IntrinsicInst* II = dyn_cast<IntrinsicInst>(CI)) { - // Now handle a few specific intrinsics - // For now, sin and cos are translated to their libclc equivalent - switch(II->getIntrinsicID()) { - case Intrinsic::sin: - case Intrinsic::cos: - case Intrinsic::sqrt: - case Intrinsic::floor: - case Intrinsic::nvvm_rsqrt_approx_f: - { - DEBUG(errs() << "Found math function: " << *II << "\n"); - // Get the builtin function - // SPIR uses mangled name for builtin math functions - assert(II->getType()->isFloatTy() - && "Only handling sin(float) and cos(float)!"); - std::string name = getMathFunctionName(II->getIntrinsicID()); - - FunctionType* MathFT = FunctionType::get(II->getType(), - Type::getFloatTy(KernelM->getContext()), - false); - Function* MathFunction = cast<Function> - (KernelM->getOrInsertFunction(name, MathFT)); - CallInst* CI = CallInst::Create(MathFunction, II->getArgOperand(0), II->getName(), II); - - II->replaceAllUsesWith(CI); - IItoRemove.push_back(II); - break; - } - default: - DEBUG(errs() << "[WARNING] Found Intrinsic: " << *II << "\n" ); - } - } - - } - else { - // Clone the function - ValueToValueMapTy VMap; - Function* newCalleeF = CloneFunction(calleeF, VMap); - newCalleeF->removeFromParent(); //TODO: MARIA check - KernelM->getFunctionList().push_back(newCalleeF); - } - //TODO: how to handle address space qualifiers in load/store - } - - } - - // We need to do this explicitly: DCE pass will not remove them because we - // have assumed theworst memory behaviour for these function calls - // Traverse the vector backwards, otherwise definitions are deleted while - // their subsequent uses are still around - for (std::vector<IntrinsicInst *>::reverse_iterator ri = IItoRemove.rbegin(), - re = IItoRemove.rend(); ri != re; ++ri) - (*ri)->eraseFromParent(); - - addCLMetadata(F_spir); - kernel->KernelFunction = F_spir; - errs() << "Identified kernel - " << kernel->KernelFunction->getName() << "\n"; - DEBUG(errs() << *KernelM); - - return; -} - -bool DFG2LLVM_SPIR::runOnModule(Module &M) { - errs() << "\nDFG2LLVM_SPIR PASS\n"; - - // Get the BuildDFG Analysis Results: - // - Dataflow graph - // - Maps from i8* hansles to DFNode and DFEdge - BuildDFG &DFG = getAnalysis<BuildDFG>(); - - // DFInternalNode *Root = DFG.getRoot(); - std::vector<DFInternalNode*> Roots = DFG.getRoots(); - // BuildDFG::HandleToDFNode &HandleToDFNodeMap = DFG.getHandleToDFNodeMap(); - // BuildDFG::HandleToDFEdge &HandleToDFEdgeMap = DFG.getHandleToDFEdgeMap(); - - // Visitor for Code Generation Graph Traversal - CGT_SPIR *CGTVisitor = new CGT_SPIR(M, DFG); - - // Iterate over all the DFGs and produce code for each one of them - for (auto rootNode: Roots) { - // Initiate code generation for root DFNode - CGTVisitor->visit(rootNode); - } - - // This is not required. Itrinsics that do not have a use are not a problem - //CGTVisitor->removeLLVMIntrinsics(); - CGTVisitor->writeKernelsModule(); - - //TODO: Edit module epilogue to remove the VISC intrinsic declarations - delete CGTVisitor; - - return true; -} - -std::string CGT_SPIR::getKernelsModuleName(Module &M) { - /*SmallString<128> currentDir; - llvm::sys::fs::current_path(currentDir); - std::string fileName = getFilenameFromModule(M); - Twine output = Twine(currentDir) + "/Output/" + fileName + ""; - return output.str().append(".kernels.ll");*/ - std::string mid = M.getModuleIdentifier(); - return mid.append(".kernels.ll"); -} - -void CGT_SPIR::fixValueAddrspace(Value* V, unsigned addrspace) { - assert(isa<PointerType>(V->getType()) - && "Value should be of Pointer Type!"); - PointerType* OldTy = cast<PointerType>(V->getType()); - PointerType* NewTy = PointerType::get(OldTy->getElementType(), addrspace); - V->mutateType(NewTy); - for(Value::user_iterator ui = V->user_begin(), ue = V->user_end(); ui != ue; ui++) { - // Change all uses producing pointer type in same address space to new - // addressspace. - if(PointerType* PTy = dyn_cast<PointerType>((*ui)->getType())) { - if(PTy->getAddressSpace() == OldTy->getAddressSpace()) { - fixValueAddrspace(*ui, addrspace); - } - } - } -} - -Function* CGT_SPIR::changeArgAddrspace(Function* F, std::vector<unsigned> &Args, unsigned addrspace) { - unsigned idx = 0; - std::vector<Type*> ArgTypes; - for(auto& arg: F->getArgumentList()) { - DEBUG(errs() << arg << "\n"); - unsigned argno = arg.getArgNo(); - if ((idx < Args.size()) && (argno == Args[idx])) { - fixValueAddrspace(&arg, addrspace); - idx++; - } - ArgTypes.push_back(arg.getType()); - } - FunctionType* newFT = FunctionType::get(F->getReturnType(), ArgTypes, false); - - //F->mutateType(PTy); - Function* newF = cloneFunction(F, newFT, false); - replaceNodeFunctionInIR(*F->getParent(), F, newF); - - DEBUG(errs() << *newF->getFunctionType() << "\n" <<*newF << "\n"); - return newF; -} - -/* Remove the specified argument from arguments at positions denoted in Args */ -void CGT_SPIR::removeAttributeAtArguments(Function* F, std::vector<unsigned> &Args, Attribute::AttrKind attrKind) { - DEBUG(errs() << "Removing nocapture attribute from shared memory arguments of function " << F->getName() << "\n"); - - unsigned cnt = 0, arg_no = 0; - for(Function::arg_iterator ai = F->arg_begin(), ae = F->arg_end(); - ai != ae && arg_no < Args.size(); ++ai, ++cnt) { - - if (Args[arg_no] == cnt) { - AttributeSet AS = F->getAttributes(); - AttrBuilder AB(AS, ai->getArgNo()+1); - AB.removeAttribute(attrKind); - AttributeSet argAS = AttributeSet::get(F->getContext(), ai->getArgNo()+1, AB); - F->removeAttributes(1+ai->getArgNo(), AS.getParamAttributes(ai->getArgNo() + 1)); - F->addAttributes(1+ai->getArgNo(), argAS); - - arg_no++; - } - } -} - -/* Add metadata to module KernelM, for OpenCL kernels */ -void CGT_SPIR::addCLMetadata(Function *F) { - // TODO: There is additional metadata used by kernel files but we skip them as - // they are not mandatory. In future they might be useful to enable - // optimizations - - IRBuilder<> Builder(&*F->begin()); - - // Create node for "kernel_arg_type" - SmallVector<Metadata*,8> argTypeNames; - argTypeNames.push_back(MDString::get(KernelM->getContext(), "kernel_arg_type")); - - for(Function::arg_iterator ai = F->arg_begin(), ae = F->arg_end(); ai != ae; - ai++) { - argTypeNames.push_back(MDString::get(KernelM->getContext(), printType(ai->getType()))); - } - // All argument type names are in the vector. Create a metadata node - // "kernel_arg_type" - MDTuple* KernelArgTypes = MDNode::get(KernelM->getContext(), argTypeNames); - - // Create kernel metadata node containg the kernel function and the - // "kernel_arg_type" metadata node created above - SmallVector<Metadata*,8> KernelMD; - KernelMD.push_back(ValueAsMetadata::get(F)); - KernelMD.push_back(KernelArgTypes); - MDTuple *MDKernelNode = MDNode::get(KernelM->getContext(), KernelMD); - - // Create metadata node opencl.kernels. It points to the kernel metadata node - NamedMDNode *MDN_kernels = KernelM->getOrInsertNamedMetadata("opencl.kernels"); - MDN_kernels->addOperand(MDKernelNode); - - //KernelMD.push_back(MDString::get(KernelM->getContext(), "kernel")); - // TODO: Replace 1 with the number of the kernel. - // Add when support for multiple launces is added - //KernelMD.push_back(ConstantInt::get(Type::getInt32Ty(KernelM->getContext()),1)); - //MDNode *MDNvvmAnnotationsNode = MDNode::get(KernelM->getContext(), KernelMD); - //NamedMDNode *MDN_annotations = KernelM->getOrInsertNamedMetadata("nvvm.annotations"); - //MDN_annotations->addOperand(MDNvvmAnnotationsNode); - -} - -/* Function to remove all remaining declarations of llvm intrinsics, - * as they are not supported in SPIR. - */ -void CGT_SPIR::removeLLVMIntrinsics() { - - std::vector<Function*> fv = std::vector<Function*>(); - - for (Module::iterator mi = KernelM->begin(), me = KernelM->end(); (mi != me); ++mi) { - Function* F = &*mi; - if (F->isDeclaration() && F->getName().startswith("llvm.")) { - DEBUG(errs() << "Declaration: " << F->getName() << " with " << F->getNumUses() <<"uses.\n"); - assert(F->hasNUses(0) && "LLVM intrinsic function still in use"); - fv.push_back(F); - } - } - - for (std::vector<Function*>::iterator vi = fv.begin(); vi != fv.end(); ++vi) { - DEBUG(errs() << "Erasing declaration: " << (*vi)->getName() <<"\n"); - (*vi)->replaceAllUsesWith(UndefValue::get((*vi)->getType())); - (*vi)->eraseFromParent(); - } - -} - -void CGT_SPIR::writeKernelsModule() { - - // In addition to deleteing all otjer functions, we also want to spice it up a - // little bit. Do this now. - legacy::PassManager Passes; - - std::error_code EC; - tool_output_file Out(getKernelsModuleName(M).c_str(), EC, sys::fs::F_None); - if (EC) { - errs() << EC.message() << "\n"; - } - - Passes.add( - createPrintModulePass(Out.os())); - - Passes.run(*KernelM); - - // Declare success. - Out.keep(); -} - -Function* CGT_SPIR::transformFunctionToVoid(Function* F) { - - // FIXME: Maybe do that using the Node? - StructType* FRetTy = cast<StructType>(F->getReturnType()); - assert(FRetTy && "Return Type must always be a struct"); - - // Keeps return statements, because we will need to replace them - std::vector<ReturnInst *> RItoRemove; - findReturnInst(F, RItoRemove); - - - // Check for { } return struct, which means that the function returns void - if (FRetTy->isEmptyTy()) { - - DEBUG(errs() << "\tFunction output struct is void\n"); - DEBUG(errs() << "\tNo parameters added\n"); - - // Replacing return statements with others returning void - for (std::vector<ReturnInst *>::iterator i = RItoRemove.begin(), - e = RItoRemove.end(); i != e; ++i) { - ReturnInst::Create((F->getContext()), 0, (*i)); - (*i)->eraseFromParent(); - } - DEBUG(errs() << "\tChanged return statements to return void\n"); - } - else { - // The struct has return values, thus needs to be converted to parameter - - // Iterate over all element types of return struct and add arguments to the - // function - std::vector<Argument*> Args; - for (unsigned i=0; i<FRetTy->getNumElements(); i++) { - Argument* RetArg = new Argument(FRetTy->getElementType(i)->getPointerTo(), "ret_arg", F); - Args.push_back(RetArg); - DEBUG(errs() << "\tCreated parameter: " << *RetArg << "\n"); - } - - Function::arg_iterator ai, ae; - - DEBUG(errs() << "\tReplacing Return statements\n"); - // Replace return statements with extractValue and store instructions - for (std::vector<ReturnInst *>::iterator rii = RItoRemove.begin(), - rie = RItoRemove.end(); rii != rie; ++rii) { - ReturnInst* RI = (*rii); - Value* RetVal = RI->getReturnValue(); - for(unsigned i = 0; i < Args.size(); i++) { - ExtractValueInst* EI = ExtractValueInst::Create(RetVal, ArrayRef<unsigned>(i), - Args[i]->getName()+".val", RI); - new StoreInst(EI, Args[i], RI); - } - // assert(RetVal && "Return value should not be null at this point"); - // StructType* RetType = cast<StructType>(RetVal->getType()); - // assert(RetType && "Return type is not a struct"); - - ReturnInst::Create((F->getContext()), 0, RI); - RI->eraseFromParent(); - - } - } - DEBUG(errs() << "\tReplaced return statements\n"); - - // Create the argument type list with the added argument's type - std::vector<Type*> ArgTypes; - for(Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end(); - ai != ae; ++ai) { - ArgTypes.push_back(ai->getType()); - } - - // Adding new arguments to the function argument list, would not change the - // function type. We need to change the type of this function to reflect the - // added arguments - Type* VoidRetType = Type::getVoidTy(F->getContext()); - FunctionType* newFT = FunctionType::get(VoidRetType, ArgTypes, F->isVarArg()); - - // Change the function type - //F->mutateType(PTy); - Function* newF = cloneFunction(F, newFT, false); - replaceNodeFunctionInIR(*F->getParent(), F, newF); - - return newF; -} - -// Remove the visc in/out attributes from kernel function -void CGT_SPIR::removeInOutAttributes(Function* F) { - DEBUG(errs() << "Removing visc attributes from argument list of function " << F->getName() << "\n"); - for(Function::arg_iterator ai = F->arg_begin(), ae = F->arg_end(); - ai != ae; ai++) { - - AttributeSet AS = F->getAttributes(); - AttrBuilder AB(AS, ai->getArgNo()+1); - AB.removeAttribute(Attribute::In); - AB.removeAttribute(Attribute::Out); - AB.removeAttribute(Attribute::InOut); - AttributeSet argAS = AttributeSet::get(F->getContext(), ai->getArgNo()+1, AB); - F->removeAttributes(1+ai->getArgNo(), AS.getParamAttributes(ai->getArgNo() + 1)); - F->addAttributes(1+ai->getArgNo(), argAS); - - } -} - -/****************************************************************************** - * Helper functions * - ******************************************************************************/ - -// Calculate execute node parameters which include, number of diemnsions for -// dynamic instances of the kernel, local and global work group sizes. -static void getExecuteNodeParams(Module &M, Value* &workDim, Value* &LocalWGPtr, Value* - &GlobalWGPtr, Kernel* kernel, ValueToValueMapTy& VMap, Instruction* IB) { - - // Assign number of dimenstions a constant value - workDim = ConstantInt::get(Type::getInt32Ty(M.getContext()), kernel->gridDim); - - // If local work group size if null - if(!kernel->hasLocalWG()) { - LocalWGPtr = Constant::getNullValue(Type::getInt64PtrTy(M.getContext())); - } - else { - for(unsigned i = 0; i < kernel->localWGSize.size(); i++) { - if(isa<Argument>(kernel->localWGSize[i])) - kernel->localWGSize[i] = VMap[kernel->localWGSize[i]]; - } - LocalWGPtr = genWorkGroupPtr(M, kernel->localWGSize, VMap, IB, "LocalWGSize"); - } - - for(unsigned i = 0; i < kernel->globalWGSize.size(); i++) { - if(isa<Argument>(kernel->globalWGSize[i])) - kernel->globalWGSize[i] = VMap[kernel->globalWGSize[i]]; - } - - // For OpenCL, global work group size is the total bumber of instances in each - // dimension. So, multiply local and global dim limits. - std::vector<Value*> globalWGSizeInsts; - if(kernel->hasLocalWG()) { - for (unsigned i = 0; i < kernel->gridDim; i++) { - BinaryOperator* MulInst = BinaryOperator::Create(Instruction::Mul, kernel->globalWGSize[i], kernel->localWGSize[i], "", IB); - globalWGSizeInsts.push_back(MulInst); - } - } - else { - globalWGSizeInsts = kernel->globalWGSize; - } - GlobalWGPtr = genWorkGroupPtr(M, globalWGSizeInsts, VMap, IB, "GlobalWGSize"); - DEBUG(errs() << "Pointer to global work group: " << *GlobalWGPtr << "\n"); -} - -// CodeGen for allocating space for Work Group on stack and returning a pointer -// to its address -static Value* genWorkGroupPtr(Module &M, std::vector<Value*> WGSize, ValueToValueMapTy& VMap, Instruction* IB, const Twine& WGName) { - Value* WGPtr; - // Get int64_t and or ease of use - Type* Int64Ty = Type::getInt64Ty(M.getContext()); - - // Work Group type is [#dim x i64] - Type* WGTy = ArrayType::get(Int64Ty, WGSize.size()); - // Allocate space of Global work group data on stack and get pointer to - // first element. - AllocaInst* WG = new AllocaInst(WGTy, WGName, IB); - WGPtr = BitCastInst::CreatePointerCast(WG, Int64Ty->getPointerTo(), WG->getName()+".0", IB); - Value* nextDim = WGPtr; - DEBUG(errs() << *WGPtr << "\n"); - - // Iterate over the number of dimensions and store the global work group - // size in that dimension - for(unsigned i=0; i < WGSize.size(); i++) { - assert(WGSize[i]->getType()->isIntegerTy() && "Dimension not an integer type!"); - - if(WGSize[i]->getType() != Int64Ty) { - // If number of dimensions are mentioned in any other integer format, - // generate code to extend it to i64. We need to use the mapped value in - // the new generated function, hence the use of VMap - // FIXME: Why are we changing the kernel WGSize vector here? - DEBUG(errs() << "Not i64. Zero extend required.\n"); - DEBUG(errs() << *WGSize[i] << "\n"); - CastInst* CI = BitCastInst::CreateIntegerCast(WGSize[i], Int64Ty, true, "", IB); - DEBUG(errs() << "Bitcast done.\n"); - StoreInst* SI = new StoreInst(CI, nextDim, IB); - DEBUG(errs() << "Zero extend done.\n"); - DEBUG(errs() << "\tZero extended work group size: " << *SI << "\n"); - } else { - // Store the value representing work group size in ith dimension on - // stack - StoreInst* SI = new StoreInst(WGSize[i], nextDim, IB); - - DEBUG(errs() << "\t Work group size: " << *SI << "\n"); - } - if(i+1 < WGSize.size()) { - // Move to next dimension - GetElementPtrInst* GEP = GetElementPtrInst::Create(nullptr, nextDim, - ArrayRef<Value*>(ConstantInt::get(Int64Ty, 1)), - WG->getName()+"."+Twine(i+1), - IB); - DEBUG(errs() << "\tPointer to next dimension on stack: " << *GEP << "\n"); - nextDim = GEP; - } - } - return WGPtr; - -} - -//Get generated SPIR binary name -static std::string getSPIRFilename(const Module& M) { - std::string mid = M.getModuleIdentifier(); - return mid.append(".kernels.bc"); - -} - -// Get the name of the input file from module ID -static std::string getFilenameFromModule(const Module& M) { - std::string moduleID = M.getModuleIdentifier(); - return moduleID.substr(moduleID.find_last_of("/")+1); -} - -// Changes the data layout of the Module to be compiled with SPIR backend -// TODO: Figure out when to call it, probably after duplicating the modules -static void changeDataLayout(Module &M) { - std::string spir64_layoutStr = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024"; - - M.setDataLayout(StringRef(spir64_layoutStr)); - return; -} - -static void changeTargetTriple(Module &M) { - std::string spir64_TargetTriple = "spir64-unknown-unknown"; - M.setTargetTriple(StringRef(spir64_TargetTriple)); -} - -// Helper function, generate a string representation of a type -static std::string printType(Type* ty) { - std::string type_str; - raw_string_ostream rso(type_str); - ty->print(rso); - return rso.str(); -} - -// Helper function to get mangled names of OpenCL built ins -static StringRef getMangledName(std::string name) { - Twine mangledName = "_Z"+Twine(name.size())+name+"j"; - return StringRef(mangledName.str()); -} - - -// Helper function, populate a vector with all return statements in a function -static void findReturnInst(Function* F, std::vector<ReturnInst *> & ReturnInstVec) { - for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i) { - Instruction *I = &(*i); - ReturnInst* RI = dyn_cast<ReturnInst>(I); - if (RI) { - ReturnInstVec.push_back(RI); - } - } -} - -// Helper function, populate a vector with all IntrinsicID intrinsics in a function -static void findIntrinsicInst(Function* F, Intrinsic::ID IntrinsicID, std::vector<IntrinsicInst *> & IntrinsicInstVec) { - for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i) { - Instruction *I = &(*i); - IntrinsicInst* II = dyn_cast<IntrinsicInst>(I); - if (II && II->getIntrinsicID() == IntrinsicID) { - IntrinsicInstVec.push_back(II); - } - } -} - -// Helper function to get mangled names of OpenCL built ins for atomics -static StringRef getAtomicMangledName(std::string name, unsigned addrspace, bool sign) { - Twine mangledName = "_Z" + - Twine(name.size())+name + - "PU3AS"+Twine(addrspace) + "jj"; -// ((sign) ? "ii" : "jj"); - return StringRef(mangledName.str()); -} - -// Helper funtion, returns the OpenCL function name corresponding to atomic op -static StringRef getAtomicOpName(Intrinsic::ID ID, unsigned addrspace) { - switch(ID) { - case Intrinsic::visc_atomic_cmpxchg: - return getAtomicMangledName("atom_cmpxchg", addrspace, true); - case Intrinsic::visc_atomic_add: - return getAtomicMangledName("atom_add", addrspace, true); - case Intrinsic::visc_atomic_sub: - return getAtomicMangledName("atom_sub", addrspace, true); - case Intrinsic::visc_atomic_min: - return getAtomicMangledName("atom_min", addrspace, true); - case Intrinsic::visc_atomic_umin: - return getAtomicMangledName("atom_min", addrspace, false); - case Intrinsic::visc_atomic_max: - return getAtomicMangledName("atom_max", addrspace, true); - case Intrinsic::visc_atomic_umax: - return getAtomicMangledName("atom_max", addrspace, false); - case Intrinsic::visc_atomic_inc: - return getAtomicMangledName("atom_inc", addrspace, true); - case Intrinsic::visc_atomic_dec: - return getAtomicMangledName("atom_dec", addrspace, true); - case Intrinsic::visc_atomic_xchg: - return getAtomicMangledName("atom_xchg", addrspace, true); - case Intrinsic::visc_atomic_and: - return getAtomicMangledName("atom_and", addrspace, true); - case Intrinsic::visc_atomic_or: - return getAtomicMangledName("atom_or", addrspace, true); - case Intrinsic::visc_atomic_xor: - return getAtomicMangledName("atom_xor", addrspace, true); - default: - llvm_unreachable("Unsupported atomic intrinsic!"); - }; -} - -static std::string getMathFunctionName(Intrinsic::ID ID) { - switch(ID) { - case Intrinsic::sin: return "_Z3sinf"; - case Intrinsic::cos: return "_Z3cosf"; - case Intrinsic::sqrt: return "_Z4sqrtf"; - case Intrinsic::floor: return "_Z5floorf"; - case Intrinsic::nvvm_rsqrt_approx_f: return "_Z5rsqrtf"; - default: - llvm_unreachable("Unsupported math function!"); - }; -} - -} // End of namespace - -char DFG2LLVM_SPIR::ID = 0; -static RegisterPass<DFG2LLVM_SPIR> X("dfg2llvm-spir", - "Dataflow Graph to LLVM for SPIR Pass", - false /* does not modify the CFG */, - true /* transformation, * - * not just analysis */); - diff --git a/lib/DFG2LLVM_SPIR/DFG2LLVM_SPIR.exports b/lib/DFG2LLVM_SPIR/DFG2LLVM_SPIR.exports deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/lib/DFG2LLVM_SPIR/LLVMBuild.txt b/lib/DFG2LLVM_SPIR/LLVMBuild.txt deleted file mode 100644 index 72c4de9efdc816ca74d54d96f4f66afd467d1639..0000000000000000000000000000000000000000 --- a/lib/DFG2LLVM_SPIR/LLVMBuild.txt +++ /dev/null @@ -1,21 +0,0 @@ -;===- ./lib/Transforms/DFG2LLVM_SPIR/LLVMBuild.txt -------------*- Conf -*--===; -; -; The LLVM Compiler Infrastructure -; -; This file is distributed under the University of Illinois Open Source -; License. See LICENSE.TXT for details. -; -;===------------------------------------------------------------------------===; -; -; This is an LLVMBuild description file for the components in this subdirectory. -; -; For more information on the LLVMBuild system, please see: -; -; http://llvm.org/docs/LLVMBuild.html -; -;===------------------------------------------------------------------------===; - -[component_0] -type = Library -name = DFG2LLVM_SPIR -parent = Transforms diff --git a/lib/InsertApproxInfo/CMakeLists.txt b/lib/InsertApproxInfo/CMakeLists.txt deleted file mode 100644 index 2b6d41bd709ce1e57bd081ffb9542d157bad36e4..0000000000000000000000000000000000000000 --- a/lib/InsertApproxInfo/CMakeLists.txt +++ /dev/null @@ -1,12 +0,0 @@ -if(WIN32 OR CYGWIN) - set(LLVM_LINK_COMPONENTS Core Support) -endif() - -add_llvm_loadable_module( InsertApproxInfo - InsertApproxInfo.cpp - - DEPENDS - intrinsics_gen - PLUGIN_TOOL - opt - ) diff --git a/lib/InsertApproxInfo/InsertApproxInfo.cpp b/lib/InsertApproxInfo/InsertApproxInfo.cpp deleted file mode 100644 index bde4ef89073fbf1df5bcafc3907979f8d15b704f..0000000000000000000000000000000000000000 --- a/lib/InsertApproxInfo/InsertApproxInfo.cpp +++ /dev/null @@ -1,498 +0,0 @@ -//===------------------------ InPlaceDFGAnalysis.cpp ----------------------===// -// -// -// -// The LLVM Compiler Infrastructure -// -// -// -// This file is distributed under the University of Illinois Open Source -// -// License. See LICENSE.TXT for details. -// -// -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "InsertApproxInfo" - -#include "llvm/IR/Module.h" -#include "llvm/Pass.h" -#include "llvm/Support/SourceMgr.h" -#include "llvm/InPlaceDFG/InPlaceDFGAnalysis.h" -#include "llvm/SupportVISC/DFG2LLVM.h" -#include "llvm/IR/InstrTypes.h" -#include <unordered_map> -#include <dirent.h> -#include <stdio.h> -#include <sstream> -#include <fstream> - - -using namespace llvm; -using namespace builddfg; -using namespace dfg2llvm; -using namespace inplacedfg; - - -namespace { - -static cl::opt<std::string> dir_name("results-dir", cl::desc(" Name of directory with Autotuner results ")); - - -struct ApproxMetrics{ - std::string op_name; - std::string category; - unsigned int rank; // rank given by autotuner - double approx_level; - // Relative L-norm metrics - double relative_l1; - double relative_l2; - double relative_linf; - // Mean L-norm metrics - double mean_l1; - double mean_l2; - double mean_linf; -}; - - - -struct InsertApproxInfoWrapperPass : public ModulePass { - static char ID; // Pass identification, replacement for typeid - InsertApproxInfoWrapperPass() : ModulePass(ID) {} - -public: - // Functions - bool runOnModule(Module &M); - void getAnalysisUsage(AnalysisUsage &AU) const; -}; - - -// Visitor for Code generation traversal (tree traversal for now) -class InsertApproxInfo : public CodeGenTraversal { - -private: - // Virtual Functions - void init() {} - void initRuntimeAPI() {} - void codeGen(DFInternalNode* N); - void codeGen(DFLeafNode* N); - void loadTrainedApproxMetrics(std::string dir_path); - void loadMetricsFromFile(std::string dir_path, std::string file_path, std::string category); - void loadMetricsFromDir(std::string dir_path, std::string category); - void readApproxValues(const std::string line, ApproxMetrics* approx_metrics); - void initIntrinsicNames(); - void initGlobalStrings(); - - // private data - std::unordered_map<std::string, std::string> intrinsics_map; - std::unordered_map<std::string, std::vector<ApproxMetrics*>> operation_metrics; - GlobalVariable* rank_str; - GlobalVariable* category_str; - GlobalVariable* mean_l1_str; - GlobalVariable* mean_l2_str; - GlobalVariable* mean_linf_str; - GlobalVariable* rel_l1_str; - GlobalVariable* rel_l2_str; - GlobalVariable* rel_linf_str; - - - // Tracks the id of the tensor op processed - unsigned int currentID; - -public: - // Constructor - InsertApproxInfo(Module &_M, BuildDFG &_DFG); - - //void run(Module &M, BuildDFG &DFG); - void run(std::string dir_path); - -}; - - - -void InsertApproxInfo::initIntrinsicNames(){ - - intrinsics_map["llvm.visc.tensor.convolution"] = "tensorConv"; - intrinsics_map["llvm.visc.tensor.mul"] = "tensorGemm"; - intrinsics_map["llvm.visc.tensor.add"] = "tensorAdd"; - intrinsics_map["llvm.visc.tensor.pool.max"] = "tensorPooling"; - intrinsics_map["llvm.visc.tensor.tanh"] = "tensorTanh"; -} - - -void InsertApproxInfo::initGlobalStrings(){ - - /**** Creating global constant strings for each approximation metric type *******/ - - std::string rank_string = "rank"; - Constant* stringConst = ConstantDataArray::getString(M.getContext(), StringRef(rank_string.c_str()), true); - rank_str = new GlobalVariable(M, stringConst->getType(), true, - GlobalValue::ExternalLinkage, stringConst, ""); - - std::string category_string = "category"; - stringConst = ConstantDataArray::getString(M.getContext(), StringRef(category_string.c_str()), true); - category_str = new GlobalVariable(M, stringConst->getType(), true, - GlobalValue::ExternalLinkage, stringConst, ""); - - // Mean l-norm metrics - std::string metric_string = "mean_l1"; - stringConst = ConstantDataArray::getString(M.getContext(), StringRef(metric_string.c_str()), true); - mean_l1_str = new GlobalVariable(M, stringConst->getType(), true, - GlobalValue::ExternalLinkage, stringConst, ""); - - metric_string = "mean_l2"; - stringConst = ConstantDataArray::getString(M.getContext(), StringRef(metric_string.c_str()), true); - mean_l2_str = new GlobalVariable(M, stringConst->getType(), true, - GlobalValue::ExternalLinkage, stringConst, ""); - - metric_string = "mean_linf"; - stringConst = ConstantDataArray::getString(M.getContext(), StringRef(metric_string.c_str()), true); - mean_linf_str = new GlobalVariable(M, stringConst->getType(), true, - GlobalValue::ExternalLinkage, stringConst, ""); - - // Relative l-norm metrics - metric_string = "rel_l1"; - stringConst = ConstantDataArray::getString(M.getContext(), StringRef(metric_string.c_str()), true); - rel_l1_str = new GlobalVariable(M, stringConst->getType(), true, - GlobalValue::ExternalLinkage, stringConst, ""); - - metric_string = "rel_l2"; - stringConst = ConstantDataArray::getString(M.getContext(), StringRef(metric_string.c_str()), true); - rel_l2_str = new GlobalVariable(M, stringConst->getType(), true, - GlobalValue::ExternalLinkage, stringConst, ""); - - metric_string = "rel_linf"; - stringConst = ConstantDataArray::getString(M.getContext(), StringRef(metric_string.c_str()), true); - rel_linf_str = new GlobalVariable(M, stringConst->getType(), true, - GlobalValue::ExternalLinkage, stringConst, ""); - -} - - -InsertApproxInfo::InsertApproxInfo(Module &_M, BuildDFG &_DFG) : - CodeGenTraversal(_M, _DFG){ - - currentID = 1; - - initIntrinsicNames(); - initGlobalStrings(); -} - - -void InsertApproxInfoWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<BuildDFG>(); - AU.addPreserved<BuildDFG>(); -} - - -bool InsertApproxInfoWrapperPass::runOnModule(Module &M) { - - std::string dir_path = dir_name.getValue(); - // Get the BuildDFG Analysis Results: - // - Dataflow graph - BuildDFG &DFG = getAnalysis<BuildDFG>(); - - InsertApproxInfo IApprox(M, DFG); - IApprox.run(dir_path); - - return false; -} - - -void InsertApproxInfo::readApproxValues(const std::string line, ApproxMetrics* approx_metrics){ - - std::istringstream in(line); - std::string op_name; - - float approx_level; - - float mean_l1; - float mean_l2; - float mean_linf; - - float relative_l1; - float relative_l2; - float relative_linf; - - in >> op_name; - in >> approx_level; - - in >> mean_l1; - in >> mean_l2; - in >> mean_linf; - - in >> relative_l1; - in >> relative_l2; - in >> relative_linf; - - printf("\n *** op_name = %s \n", op_name.c_str()); - printf("approx_level = %f \n", approx_level); - printf("relative_l1 = %f \n", relative_l1); - printf("relative_l2 = %f \n", relative_l2); - printf("relative_linf = %f \n", relative_linf); - printf("mean_l1 = %f \n", mean_l1); - printf("mean_l2 = %f \n", mean_l2); - printf("mean_linf = %f \n", mean_linf); - - approx_metrics->op_name = op_name; - approx_metrics->approx_level = approx_level; - approx_metrics->mean_l1 = mean_l1; - approx_metrics->mean_l2 = mean_l2; - approx_metrics->mean_linf = mean_linf; - approx_metrics->relative_l1 = relative_l1; - approx_metrics->relative_l2 = relative_l2; - approx_metrics->relative_linf = relative_linf; - -} - - -unsigned int getFileRank(std::string file_path){ - - char file_name[100]; // Assuming no file names greater than 100 chars - strcpy(file_name, file_path.c_str()); - - char* pch = strtok(file_name, "_"); - char* last_pch; - while(pch != NULL){ - last_pch = pch; - pch = strtok(NULL, "_"); - } - - printf("NOTE: ****** last_pch = %s \n", last_pch); - - size_t sz; - int rank = std::stoi(last_pch, &sz); - - return rank + 1; // NOTE: Adding 1 to start ranks with '1' -} - - - -void InsertApproxInfo::loadMetricsFromFile(std::string dir_path, std::string file_path, std::string category){ - - std::string full_path = dir_path + "/" + file_path; - printf("full_path = %s \n", full_path.c_str()); - std::ifstream infile(full_path.c_str()); - std::string line; - - unsigned int it_count = 0; - while(std::getline(infile, line)){ - - // Skip first line with confidence information - if(it_count > 0){ - std::vector<float> approx_values; - ApproxMetrics* approx_metrics = new ApproxMetrics; - readApproxValues(line, approx_metrics); - - approx_metrics->category = category; - unsigned int rank = getFileRank(file_path); - approx_metrics->rank = rank; - - std::string unique_op_name = approx_metrics->op_name + std::to_string(it_count); - operation_metrics[unique_op_name].push_back(approx_metrics); - printf("\n ** unique_op_name = %s \n", unique_op_name.c_str()); - } - - it_count++; - } - -} - - - -void InsertApproxInfo::loadMetricsFromDir(std::string dir_path, std::string category){ - - struct dirent* entry; - dir_path = dir_path + category; - - DIR* dir = opendir(dir_path.c_str()); - if(dir == NULL){ - printf("Directory %s not found . Aborting ... \n\n ", dir_path.c_str()); - abort(); - } - - while((entry = readdir(dir)) != NULL){ - printf("f_name = %s \n", entry->d_name); - std::string f_name = entry->d_name; - loadMetricsFromFile(dir_path, f_name, category); - } -} - - - -void InsertApproxInfo::loadTrainedApproxMetrics(std::string dir_path){ - - std::string root_path = dir_path + "/high_confidence/"; - loadMetricsFromDir(root_path, "linear"); - loadMetricsFromDir(root_path, "log"); - loadMetricsFromDir(root_path, "quad"); -} - - -/*** Methods of InPlaceDFGAnalysis ***/ -void InsertApproxInfo::run(std::string dir_path) { - - loadTrainedApproxMetrics(dir_path); - - errs() << "\n NOTE: ApproxInfo INSERTION TRANSFORM \n"; - std::vector<DFInternalNode*> Roots = DFG.getRoots(); - - // Iterate over all the DFGs - // Analyse the edges for parameters that are valid to be used in place - for (auto rootNode: Roots) { - //ATVisitor->visit(rootNode); - this->visit(rootNode); - } - - //delete ATVisitor; - return; -} - -/*** Analysis of internal node ***/ -void InsertApproxInfo::codeGen(DFInternalNode* N) { - DEBUG(errs() << "Analysing Node: " << N->getFuncPointer()->getName() << "\n"); -} - -/*** Analysis of leaf node ***/ -void InsertApproxInfo::codeGen(DFLeafNode* N) { - DEBUG(errs() << "Analysing Node: " << N->getFuncPointer()->getName() << "\n"); - - // Skip code generation if it is a dummy node - if(N->isDummyNode()) { - DEBUG(errs() << "Skipping dummy node\n"); - return; - } - - // Abort code generation if it is an allocation node - if(N->isAllocationNode()) { - assert(false && "Allocation Node not expected in ApproxHPVM"); - return; - } - - Function *F = N->getFuncPointer(); - Module* M = F->getParent(); - std::vector<IntrinsicInst *> IItoRemove; - - - /**** Adding operand bundles for each tensor operation in the HPVM DFG Leaf Node ****/ - for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i) { - Instruction *I = &(*i); - errs()<<*I<<"\n"; - - - if (BuildDFG::isViscIntrinsic(I)) { - IntrinsicInst* II = dyn_cast<IntrinsicInst>(I); - assert((II->getCalledFunction()->getName()).startswith("llvm.visc.tensor") - && "Only HPVM tensor intrinsics allowed in ApproxHPVM leaf nodes\n"); - - std::string intrinsic_id = std::string(II->getCalledFunction()->getName().data()); - std::string runtime_func_name = intrinsics_map[intrinsic_id]; - std::string unique_name = runtime_func_name + std::to_string(currentID); - printf("\n ---- unique_name = %s \n ", unique_name.c_str()); - std::vector<ApproxMetrics*> approx_metrics; - if(operation_metrics.find(unique_name) != operation_metrics.end()){ - approx_metrics = operation_metrics[unique_name]; - } - else{ - errs()<<"Intrinsic Name NOT found in the map - Unexpected Error. Aborting ... \n\n"; - abort(); - } - - - unsigned int num_configs = approx_metrics.size(); - std::vector<OperandBundleDef> conf_bundles; - for(unsigned int i = 0; i < num_configs; i++){ - std::vector<Value*> norm_vals; - - norm_vals.push_back(category_str); - Constant* categoryConst = ConstantDataArray::getString(M->getContext(), StringRef(approx_metrics[i]->category.c_str()), true); - GlobalVariable* category_value = new GlobalVariable(*M, categoryConst->getType(), true, - GlobalValue::ExternalLinkage, categoryConst, ""); - norm_vals.push_back(category_value); - - norm_vals.push_back(rank_str); - Constant* constIntVal = ConstantInt::get(Type::getInt32Ty(M->getContext()), approx_metrics[i]->rank); - norm_vals.push_back(constIntVal); - - // Adding mean l-norm metrics - norm_vals.push_back(mean_l1_str); - Constant* constFPVal = ConstantFP::get(Type::getDoubleTy(M->getContext()), approx_metrics[i]->mean_l1); - norm_vals.push_back(constFPVal); - - norm_vals.push_back(mean_l2_str); - constFPVal = ConstantFP::get(Type::getDoubleTy(M->getContext()), approx_metrics[i]->mean_l2); - norm_vals.push_back(constFPVal); - - norm_vals.push_back(mean_linf_str); - constFPVal = ConstantFP::get(Type::getDoubleTy(M->getContext()), approx_metrics[i]->mean_linf); - norm_vals.push_back(constFPVal); - - // Relative l-norm Metrics - norm_vals.push_back(rel_l1_str); - constFPVal = ConstantFP::get(Type::getDoubleTy(M->getContext()), approx_metrics[i]->relative_l1); - norm_vals.push_back(constFPVal); - - norm_vals.push_back(rel_l2_str); - constFPVal = ConstantFP::get(Type::getDoubleTy(M->getContext()), approx_metrics[i]->relative_l2); - norm_vals.push_back(constFPVal); - - norm_vals.push_back(rel_linf_str); - constFPVal = ConstantFP::get(Type::getDoubleTy(M->getContext()), approx_metrics[i]->relative_linf); - norm_vals.push_back(constFPVal); - - - std::string config_name = "config_" + std::to_string(i+1); - OperandBundleDef norm_bundle(config_name, norm_vals); - - conf_bundles.push_back(norm_bundle); - } - - ArrayRef<OperandBundleDef> bundle_arr(conf_bundles); - - /*** Creating new Intrinsic call with Operand Bundles attached **/ - Function* calledFunction = II->getCalledFunction(); - unsigned num_args = II->getNumArgOperands(); - std::vector<Value*> args; - for(unsigned i = 0; i < num_args; i++){ - Value* argValue = II->getArgOperand(i); - args.push_back(argValue); - } - - CallInst* CI = CallInst::Create(calledFunction, - args, bundle_arr, "", II); - - errs()<<"NOTE: New CallInst = "<<*CI<<"\n"; - - II->replaceAllUsesWith(CI); - // Mark to remove at the end - IItoRemove.push_back(II); - - // Increment counter of op processed - currentID++; - } - } - - - for (std::vector<IntrinsicInst *>::reverse_iterator ri = IItoRemove.rbegin(), - re = IItoRemove.rend(); ri != re; ++ri) { - DEBUG(errs() << "Erasing: " << **ri << "\n"); - errs() << "Erasing: " << **ri << "\n"; - (*ri)->eraseFromParent(); - } - - -} - -char InsertApproxInfoWrapperPass::ID = 0; -static RegisterPass<InsertApproxInfoWrapperPass> X("insert-approxinfo", - "Pass to add approximation information (l-norm metrics) in the ApproxHPVM DFG", - false /* does not modify the CFG */, - false /* not transformation, just analysis */); - - - - - -} // End of namespace - diff --git a/lib/InsertApproxInfo/LLVMBuild.txt b/lib/InsertApproxInfo/LLVMBuild.txt deleted file mode 100644 index e9cf5afd4a307c1bd985238929ef06d89215f3ab..0000000000000000000000000000000000000000 --- a/lib/InsertApproxInfo/LLVMBuild.txt +++ /dev/null @@ -1,21 +0,0 @@ -;===- ./lib/Transforms/LocalMem/LLVMBuild.txt ------------------*- Conf -*--===; -; -; The LLVM Compiler Infrastructure -; -; This file is distributed under the University of Illinois Open Source -; License. See LICENSE.TXT for details. -; -;===------------------------------------------------------------------------===; -; -; This is an LLVMBuild description file for the components in this subdirectory. -; -; For more information on the LLVMBuild system, please see: -; -; http://llvm.org/docs/LLVMBuild.html -; -;===------------------------------------------------------------------------===; - -[component_0] -type = Library -name = InsertApproxInfo -parent = Transforms diff --git a/lib/MergeDFN/CMakeLists.txt b/lib/MergeDFN/CMakeLists.txt deleted file mode 100644 index 30e7330d0ccbeb2e508dea79ac22b50239d59f51..0000000000000000000000000000000000000000 --- a/lib/MergeDFN/CMakeLists.txt +++ /dev/null @@ -1,12 +0,0 @@ -if(WIN32 OR CYGWIN) - set(LLVM_LINK_COMPONENTS Core Support) -endif() - -add_llvm_loadable_module( LLVMMergeDFN - MergeDFN.cpp - - DEPENDS - intrinsics_gen - PLUGIN_TOOL - opt - ) diff --git a/lib/MergeDFN/LLVMBuild.txt b/lib/MergeDFN/LLVMBuild.txt deleted file mode 100644 index 099486e6c3196d4a20ac851a0a3563fde4a5d05d..0000000000000000000000000000000000000000 --- a/lib/MergeDFN/LLVMBuild.txt +++ /dev/null @@ -1,21 +0,0 @@ -;===- ./lib/Transforms/MergeDFN/LLVMBuild.txt ------------------*- Conf -*--===; -; -; The LLVM Compiler Infrastructure -; -; This file is distributed under the University of Illinois Open Source -; License. See LICENSE.TXT for details. -; -;===------------------------------------------------------------------------===; -; -; This is an LLVMBuild description file for the components in this subdirectory. -; -; For more information on the LLVMBuild system, please see: -; -; http://llvm.org/docs/LLVMBuild.html -; -;===------------------------------------------------------------------------===; - -[component_0] -type = Library -name = MergeDFN -parent = Transforms diff --git a/lib/MergeDFN/MergeDFN.cpp b/lib/MergeDFN/MergeDFN.cpp deleted file mode 100644 index 35e70e35cef854c21cbcc65bb84a4bddc26727a2..0000000000000000000000000000000000000000 --- a/lib/MergeDFN/MergeDFN.cpp +++ /dev/null @@ -1,2338 +0,0 @@ -//=== DFG2LLVM_NVPTX.cpp ===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#define ENABLE_ASSERTS - -#define DEBUG_TYPE "MergeDFN" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/Module.h" -#include "llvm/Pass.h" -#include "llvm/IR/InstIterator.h" -#include "llvm/Transforms/Utils/ValueMapper.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Cloning.h" -#include "llvm/IRReader/IRReader.h" -#include "llvm/Linker/Linker.h" -#include "llvm/Support/SourceMgr.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/IR/Attributes.h" -#include "llvm/SupportVISC/VISCTimer.h" -#include "llvm/SupportVISC/DFG2LLVM.h" - -#include <sstream> - -using namespace llvm; -using namespace builddfg; -using namespace dfg2llvm; - -static cl::opt<std::string> Node1Name("mc1", - cl::init(""), - cl::Hidden, - cl::desc("First node candidate for merge")); -static cl::opt<std::string> Node2Name("mc2", - cl::init(""), - cl::Hidden, - cl::desc("Second node candidate for merge")); - -namespace { -// Helper class declarations - -// Helper function declarations - -// MergeDFN -struct MergeDFN : public ModulePass { - static char ID; // Pass identification, replacement for typeid - MergeDFN() : ModulePass(ID) {} - -private: - // Member variables - - // Functions - -public: - // Functions - bool runOnModule(Module &M); - - void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<BuildDFG>(); - AU.addPreserved<BuildDFG>(); //TODO: Check - } - -}; - -// Visitor for Code generation traversal (tree traversal for now) -class MergeTraversal : public DFNodeVisitor { - -private: - //Member variables - Module &M; - BuildDFG &DFG; - DFNode *n1; - DFNode *n2; - DFNode *m; - - //Functions - void testNodeName(DFNode* N); - -public: - // Constructor - MergeTraversal(Module &_M, BuildDFG &_DFG) : M(_M), DFG(_DFG) { - n1 = NULL; - n2 = NULL; - m = NULL; - } - - virtual void visit(DFInternalNode* N) { - // Follows a bottom-up approach to find the nodes. - for(DFGraph::children_iterator i = N->getChildGraph()->begin(), - e = N->getChildGraph()->end(); i != e; ++i) { - DFNode* child = *i; - child->applyDFNodeVisitor(*this); - } - - DEBUG(errs() << "Testing Node (I) - " << N->getFuncPointer()->getName() << "\n"); - testNodeName(N); - DEBUG(errs() << "\tDONE - " << "\n"); - - } - - virtual void visit(DFLeafNode* N) { - DEBUG(errs() << "Testing Node (L) - " << N->getFuncPointer()->getName() << "\n"); - testNodeName(N); - DEBUG(errs() << "DONE" << "\n"); - } - - bool isValidMergeChoise(); - - void mergeDFN(); - -}; - -//===--------------------- Helper Function Declarations --------------===// -IntrinsicInst* createIdenticalCreateNodeWithDifferentFunction(Function* F, - IntrinsicInst* II); -IntrinsicInst* createNewCreateNodeBasedOn(Function* F, IntrinsicInst* II, - Function* Fargs); -IntrinsicInst* createIdenticalCreateEdgeWithDifferentPort(IntrinsicInst* II, -unsigned port, bool srcport); -IntrinsicInst* createIdenticalCreateEdgeWithDifferentNode(IntrinsicInst* II, -IntrinsicInst* IInode, bool srcnode); -IntrinsicInst* createIdenticalBindInputWithDifferentNode(IntrinsicInst* II, - IntrinsicInst* IInode); -IntrinsicInst* createIdenticalBindInputWithDifferentPort(IntrinsicInst* II, - unsigned port, - bool srcport); -IntrinsicInst* createIdenticalBindOutputWithDifferentNode(IntrinsicInst* II, - IntrinsicInst* IInode); -IntrinsicInst* createIdenticalBindOutputWithDifferentPort(IntrinsicInst* II, - unsigned port, - bool srcport); -void updateUsesOfCreateNodeInParent(IntrinsicInst* II1, - IntrinsicInst* II2, - IntrinsicInst* IInew, - std::map<unsigned, unsigned> InMap, - std::map<unsigned, unsigned> OutMap, - std::vector<DFEdge*> &DFEdgestoRemove, - BuildDFG &DFG); -bool isIncomingEdgeIntrinsic(IntrinsicInst* IIe, IntrinsicInst* IIn); -bool isOutgoingEdgeIntrinsic(IntrinsicInst* IIe, IntrinsicInst* IIn); -bool hasSuccessor(DFNode* N1, DFNode* N2); -bool hasImmediateSuccesssor(DFNode* N1, DFNode* N2); -bool checkEdgesType(DFNode* N1, DFNode* N2); -static void createArgTypes(DFNode* N1, DFNode* N2, std::vector<Type*> &ArgTypes); -void getChildNodeSplit(DFInternalNode* N, - std::vector<DFNode*> &AllocationNodes, - std::vector<DFNode*> &ComputeNodes); -void buildInputAndOutputMaps(DFNode* N1, DFNode* N2, - std::map<unsigned, unsigned> &N1InMap, - std::map<unsigned, unsigned> &N1OutMap, - std::map<unsigned, unsigned> &N2InMap, - std::map<unsigned, unsigned> &N2OutMap); -void buildInAndOutEdgeMaps(DFNode* N1, DFNode* N2, - std::map<unsigned, unsigned> &N1InMap, - std::map<unsigned, unsigned> &N1OutMap, - std::map<unsigned, unsigned> &N2InMap, - std::map<unsigned, unsigned> &N2OutMap); -static StructType* createReturnType(DFNode* N1, DFNode* N2); -static void copyAttrList(DFNode* N1, DFNode* N2, Function* F); -static void copyArgumentNames(DFNode* N1, DFNode* N2, Function* F); -void createShiftMap(Function* F, unsigned fromPos, unsigned num, - unsigned shift, std::vector<unsigned> &ShiftMap); -void shiftArgs(Function* F, unsigned fromPos, unsigned num, - unsigned shift, std::vector<unsigned> &ShiftMap); -static Function* createEmptyDFNodeFunction(DFNode* N1, DFNode* N2, Module &M); -static Function* createLeafDFNodeFunction(DFNode* N1, DFNode* N2, Module &M, - unsigned numOfN1AllocArgs, - unsigned posOfN1AllocArgs, - unsigned numOfN2AllocArgs); -static Function* createInternalDFNodeFunction(DFNode* N1, DFNode* N1an, - DFNode* N1cn, DFNode* N2, - DFNode* N2an, DFNode* N2cn, - Function* Fa, Function* Fc, - Module &M, - unsigned numOfN1AllocArgs, - unsigned posOfN1AllocArgs, - unsigned numOfN2AllocArgs); -void createNewInternalNodeIntrinsics(DFNode* N1, - DFNode* N2, - DFNode* N1a, - DFNode* N1c, - DFNode* N2a, - DFNode* N2c, - IntrinsicInst* IInewa, - IntrinsicInst* IInewc, - Function* Fa, //FIXME: Unused - Function* Fc, - std::vector<IntrinsicInst*> &IntrinsicInstructionsToAdd, - std::vector<IntrinsicInst*> &IntermediateInstructions); -Argument* getFunctionArgumentAt(Function* F, unsigned i); -void removeUnnecessaryInputEdges(DFNode* N, DFNode* N1, - unsigned numOfN1AllocArgs, - unsigned numOfN2AllocArgs); -void deleteInternalNodeFunction(DFNode* N, BuildDFG &DFG); -static visc::Target getPreferredTarget(Function* F); -static void addHint(Function* F, visc::Target T); -static void removeHint(Function* F, visc::Target T); -std::string getTestModuleName(Module &M); - - -//===--------------------- MergeDFN Outlined Functions --------------===// -void MergeTraversal::testNodeName(DFNode* N) { - - if (N->getFuncPointer()->getName() == Node1Name) { - //if (N->getFuncPointer()->getName() == "WrapperDilate_cloned") { - //if (N->getFuncPointer()->getName() == "WrapperDilate_cloned_WrapperErode_cloned") { - //if (N->getFuncPointer()->getName() == "WrapperHorizontal_cloned") { - //if (N->getFuncPointer()->getName() == "WrapperHorizontal_cloned_WrapperVertical_cloned") { - n1 = N; - } - else if (N->getFuncPointer()->getName() == Node2Name) { - //else if (N->getFuncPointer()->getName() == "WrapperErode_cloned") { - //else if (N->getFuncPointer()->getName() == "WrapperLincomb_cloned") { - //else if (N->getFuncPointer()->getName() == "WrapperVertical_cloned") { - //else if (N->getFuncPointer()->getName() == "WrapperSquareRoot_cloned") { - n2 = N; - } -} - -//TODO: use the topological sort to find merge candidates -bool MergeTraversal::isValidMergeChoise() { - if (!n1 || !n2) - return false; - - // Check that n1 and n2 have the same - // - parent - // - hint - // - number and size of dimensions of dynamic instances - bool valid = (n1->getParent() == n2->getParent()) && - (getPreferredTarget(n1->getFuncPointer()) == - getPreferredTarget(n2->getFuncPointer())) && - (n1->getNumOfDim() == n2->getNumOfDim()); - - std::vector<Value*> n1dim = n1->getDimLimits(); - std::vector<Value*> n2dim = n2->getDimLimits(); - for (unsigned i = 0; (i < n1dim.size()) && valid ; i++) - valid = valid && (n1dim[i] == n2dim[i]); - - // n1 should not be a successor of n2 - valid = valid && !hasSuccessor(n2, n1); - // n2 should not be a successor of n1, other than an immediate successor - valid = valid && (!hasSuccessor(n1, n2) || hasImmediateSuccesssor(n1, n2)); - - if (!valid) - return false; - - // Now, check specifically for one or two level cases - if (dyn_cast<DFLeafNode>(n1) && dyn_cast<DFLeafNode>(n1)) { - // For now, only allow one to one edges between them - return checkEdgesType(n1, n2); - } - - //At this point, at least one of them is internal node - - DFInternalNode* n1cast = dyn_cast<DFInternalNode>(n1); - DFInternalNode* n2cast = dyn_cast<DFInternalNode>(n2); - - // If not both of them are internal nodes, it is not a valid merging - if (!n1cast || !n2cast) - return false; - - // At this point, they are both internal nodes - // For internal nodes, we only allow one-to-one edges - valid = valid && checkEdgesType(n1->getParent(), n2->getParent()); // FIXME: n1 and n2? - - // We need to check that they have the appropriate internal structure - std::vector<DFNode*> AllocNodes1, ComputeNodes1, AllocNodes2, ComputeNodes2; - getChildNodeSplit(n1cast, AllocNodes1, ComputeNodes1); - getChildNodeSplit(n2cast, AllocNodes2, ComputeNodes2); - - // There must be at most a single allocation node within each one of them - // There must be exactly one compute node within each one of them - valid = valid && - (AllocNodes1.size() <= 1) && - (AllocNodes2.size() <= 1) && - (ComputeNodes1.size() == 1) && - (ComputeNodes2.size() == 1); - - // The compute nodes must be leaf nodes with the same number and size of - // dimensions of dynamic instances - DFLeafNode* n1cn = dyn_cast<DFLeafNode>(ComputeNodes1[0]); - DFLeafNode* n2cn = dyn_cast<DFLeafNode>(ComputeNodes2[0]); - if (!n1cn || !n2cn) - return false; - - errs() << "Checking if the sizes are same for internal nodes\n"; - - valid = valid && (n1cn->getNumOfDim() == n2cn->getNumOfDim()); - std::vector<Value*> n1cndim = n1cn->getDimLimits(); - std::vector<Value*> n2cndim = n2cn->getDimLimits(); - - for (unsigned i = 0; (i < n1cndim.size()) && valid ; i++) { - // These cannot fail, these valaues have been passed as arguments - Argument* n1arg = cast<Argument>(n1cndim[i]); - Argument* n2arg = cast<Argument>(n2cndim[i]); - unsigned n1argPos = n1arg->getArgNo(); - unsigned n2argPos = n2arg->getArgNo(); - // These values are coming from bind intrinsics, thus from the parent node - // The position of the argument is the same as the inPort of the incoming - // edge of their parent, n1 and n2. - DFEdge* n1argEdge = n1->getInDFEdgeAt(n1argPos); - DFEdge* n2argEdge = n2->getInDFEdgeAt(n2argPos); - // Get source position and node of these edges - unsigned n1SrcPos = n1argEdge->getSourcePosition(); - DFNode* n1SrcNode = n1argEdge->getSourceDF(); - unsigned n2SrcPos = n2argEdge->getSourcePosition(); - DFNode* n2SrcNode = n2argEdge->getSourceDF(); - valid = valid && (n1SrcPos == n2SrcPos) && (n1SrcNode == n2SrcNode); - } - - // We must also make sure that any edge that is incoming to the allocation - // node of n2 is not from n1 - if (AllocNodes2.size() == 1) { - DFNode* n2an = AllocNodes2[0]; - unsigned inPort = 0; - for (DFNode::const_indfedge_iterator ei = n2an->indfedge_begin(), - ee = n2an->indfedge_end(); (ei != ee) && valid ; ei++, inPort++) - if (n2an->getExtendedInDFEdgeAt(inPort)->getSourceDF() == ComputeNodes1[0]) - return false; - } - - return valid; -} - -void MergeTraversal::mergeDFN() { - - Function* Fm; - - if (dyn_cast<DFLeafNode>(n1)) { // One level node merging, - // n1 and n2 are leaf nodes - // Simply create the merged leaf function (with the calls) - Fm = createLeafDFNodeFunction(n1, n2, M, 0, 0, 0); - addHint(Fm, getPreferredTarget(n1->getFuncPointer())); - removeHint(n1->getFuncPointer(), getPreferredTarget(n1->getFuncPointer())); - removeHint(n2->getFuncPointer(), getPreferredTarget(n2->getFuncPointer())); - } else { // Two level node merging, n1 and n2 are internal nodes - // Correct form of internal nodes has been verified in isValidMerge - // Both n1 and n2 have at most two children: - // a compute node and maybe an allocation node - std::vector<DFNode*> AllocationNodes; - std::vector<DFNode*> ComputeNodes; - - getChildNodeSplit(cast<DFInternalNode>(n1), AllocationNodes, ComputeNodes); - DFLeafNode* N1ComputeNode = cast<DFLeafNode>(ComputeNodes[0]); - DFLeafNode* N1AllocationNode = - (AllocationNodes.size() == 1) ? cast<DFLeafNode>(AllocationNodes[0]): NULL; - AllocationNodes.clear(); - ComputeNodes.clear(); - getChildNodeSplit(cast<DFInternalNode>(n2), AllocationNodes, ComputeNodes); - DFLeafNode* N2ComputeNode = cast<DFLeafNode>(ComputeNodes[0]); - DFLeafNode* N2AllocationNode = - (AllocationNodes.size() == 1) ? cast<DFLeafNode>(AllocationNodes[0]): NULL; - - Function* Falloc = NULL; - if (N1AllocationNode && N2AllocationNode) - Falloc = createLeafDFNodeFunction(N1AllocationNode, - N2AllocationNode, - M, 0, 0, 0); - else if (N1AllocationNode) - Falloc = N1AllocationNode->getFuncPointer(); - else if (N2AllocationNode) - Falloc = N2AllocationNode->getFuncPointer(); - - unsigned numOfN1AllocArgs = 0; - unsigned posOfN1AllocArgs = 0; - unsigned numOfN2AllocArgs = 0; - if (N1AllocationNode) { - StructType* F1RetTy = - cast<StructType>(N1AllocationNode->getFuncPointer()->getReturnType()); - numOfN1AllocArgs = F1RetTy->getNumElements(); - // The position where the allocation node's arguments of n1 alloc go in - // the merged function's parameter list is the same as it was in n1 - // compute function, because all the incoming edges to n1 do not change. - // We need this information to shift the allocation parameters to the - // end of the merged function's parameter list - posOfN1AllocArgs = - N1AllocationNode->getOutDFEdgeAt(0)->getDestPosition(); - } - if (N2AllocationNode) { - StructType* F2RetTy = - cast<StructType>(N2AllocationNode->getFuncPointer()->getReturnType()); - numOfN2AllocArgs = F2RetTy->getNumElements(); - } - - errs () << "Working on leaf functions ...\n"; - Function* Fcompute = - createLeafDFNodeFunction(N1ComputeNode, - N2ComputeNode, - M, numOfN1AllocArgs, - posOfN1AllocArgs, numOfN2AllocArgs); - addHint(Fcompute, getPreferredTarget(N1ComputeNode->getFuncPointer())); - removeHint(N1ComputeNode->getFuncPointer(), - getPreferredTarget(N1ComputeNode->getFuncPointer())); - removeHint(N2ComputeNode->getFuncPointer(), - getPreferredTarget(N2ComputeNode->getFuncPointer())); - - errs () << "Leaf functions merged ...\n"; - Fm = createInternalDFNodeFunction(n1, N1AllocationNode, N1ComputeNode, - n2, N2AllocationNode, N2ComputeNode, - Falloc, Fcompute, - M, numOfN1AllocArgs, - posOfN1AllocArgs, numOfN2AllocArgs); - addHint(Fm, getPreferredTarget(n1->getFuncPointer())); - removeHint(n1->getFuncPointer(), getPreferredTarget(n1->getFuncPointer())); - removeHint(n2->getFuncPointer(), getPreferredTarget(n2->getFuncPointer())); - } - errs () << "Leaf functions merged and Internal Function merged ...\n"; - // This is before any code generation passes -> no genfunc - - // FIX PARENT DFNode'S FUNCTION - DFInternalNode* ParentNode = n1->getParent(); - - // Find createNode intrinsics for initial nodes - IntrinsicInst* II1 = n1->getInstruction(); - IntrinsicInst* II2 = n2->getInstruction(); - - // Generate createNode Intrinsic for new node and insert it - IntrinsicInst* CreateNodeII = - createIdenticalCreateNodeWithDifferentFunction(Fm, II1); - - // It needs to be inserted before either of the two. - // Find which one is first and add the new intrinsic before it - IntrinsicInst* IIfirst = NULL; - for (inst_iterator ib = inst_begin(ParentNode->getFuncPointer()), - ie = inst_end(ParentNode->getFuncPointer()); - (ib != ie) && !IIfirst ; ++ib) { - Instruction* I = &*ib; // Grab pointer to Instruction - if ((I == II1) || (I == II2)) { - IIfirst = cast<IntrinsicInst>(I); - } - } - CreateNodeII->insertBefore(IIfirst); - -/* The following is an alternative to using the BuildDFG interface. It only * - * creates this single node, not cnotinuing with the graph contained, thus * - * will not build the graph of the node if it is internal node. Instead, I * - * use the call DFG.handleCreateNode */ - -/* -// -------------------------------------------------------------------------- // -// Updating the graph directly - // Create the new node and add it to the graph - DFLeafNode* mergeDFNode = DFLeafNode::Create(CreateNodeII, Fm, - n1->getTargetHint(), - ParentNode, - n1->getNumOfDim(), - n1->getDimLimits()); - //Done Later: fix rank of mergeDFNode and successors, after edges are fixed - // mergeDFNode->setRank((n1->getRank() > n2->getRank()) ? - // (n1->getRank()) : (n2->getRank()) ); - - ParentNode->addChildToDFGraph(mergeDFNode); -// -------------------------------------------------------------------------- // -*/ - -// -------------------------------------------------------------------------- // -// Updating the BuildDFG result -// remove the two nodes from mapping, add the new one - errs () << "Updating intrinsics\n"; - DFG.removeElementFromHandleToDFNodeMap(II1); - DFG.removeElementFromHandleToDFNodeMap(II2); -// DFG.addElementToHandleToDFNodeMap(CreateNodeII, mergeDFNode); - DFG.handleCreateNode(ParentNode, CreateNodeII); - DFNode* mergeDFNode = DFG.getHandleToDFNodeMap()[CreateNodeII]; - -// -------------------------------------------------------------------------- // - - // Need to update every use of the createNode in the parent node function - // -- that would be in create edge and bind - std::map<unsigned, unsigned> N1InMap; - std::map<unsigned, unsigned> N1OutMap; - std::map<unsigned, unsigned> N2InMap; - std::map<unsigned, unsigned> N2OutMap; - // These maps map the old location of an argument/output (to its function's - // parameter list/out struct) to the new, after edges removed and functions - // merged - buildInputAndOutputMaps(n1, n2, N1InMap, N1OutMap, N2InMap, N2OutMap); - - // Edges from n1 to n2 need to be deleted. - // They are placed here for deletion at the end. - std::vector<DFEdge*> DFEdgestoRemove; - - // Update uses of createNode - that would be createEdge and bind intrinsics - - // to use the new createNode intrinsic - updateUsesOfCreateNodeInParent(II1, II2, CreateNodeII, N1InMap, N1OutMap, - DFEdgestoRemove, DFG); - updateUsesOfCreateNodeInParent(II2, II1, CreateNodeII, N2InMap, N2OutMap, - DFEdgestoRemove, DFG); - - // Both II1 and II2 have no uses left. It is safe to remove them. - errs() << "Erasing: " << *II1 << "\n"; - II1->eraseFromParent(); - errs() << "Erasing: " << *II2 << "\n"; - II2->eraseFromParent(); - -// -------------------------------------------------------------------------- // -// Updating the graph directly - - // Update - // - dataflow edges - // - successor lists - // - incoming and outgoing edge lists - // The edges are updated directly, therefore in the DFGraph DFEdgeList as well - - // For n1 - for (DFNode::indfedge_iterator indfedgeI = n1->indfedge_begin(), - indfedgeE = n1->indfedge_end(); indfedgeI != indfedgeE; indfedgeI++) { - DFEdge* E = *indfedgeI; - // Incoming edges are retargeted to new node in graph - E->setDestDF(mergeDFNode); - // Incoming edges are added to the incoming edge list - // ( no need to add them in the outgoing edge list of source nodes, - // they are already there ) - mergeDFNode->addInDFEdge(E); - // Merge node is added to the successor list of the sources of the edges - E->getSourceDF()->addSuccessor(mergeDFNode); - } - - for (DFNode::outdfedge_iterator outdfedgeI = n1->outdfedge_begin(), - outdfedgeE = n1->outdfedge_end(); outdfedgeI != outdfedgeE; outdfedgeI++) { - DFEdge* E = *outdfedgeI; - // Outgoing edges to n2 are deleted - if (E->getDestDF() == n2) { - ParentNode->getChildGraph()->deleteEdge(E); - continue; - } - - // Outgoing edges are retargeted to start from the new node in graph - E->setSourceDF(mergeDFNode); - // Outgoing edges' source port is updated - E->setSourcePosition(N1OutMap[E->getSourcePosition()]); - // Outgoing edges are added to the outgoing edge list - // ( no need to add them in the incoming edge list of destination nodes, - // they are already there ) - mergeDFNode->addOutDFEdge(E); - // The destination node is added to the successor list of merge node - mergeDFNode->addSuccessor(E->getDestDF()); - } - - // For n2 - for (DFNode::indfedge_iterator indfedgeI = n2->indfedge_begin(), - indfedgeE = n2->indfedge_end(); indfedgeI != indfedgeE; indfedgeI++) { - DFEdge* E = *indfedgeI; - // Incoming edges from n1 have already been removed from the graph - ignore - if (E->getSourceDF() == n1) { - DEBUG(errs() << "Edges between n1-n2 have already been removed from graph\n"); - } - - // Incoming edges are retargeted to new node in graph - E->setDestDF(mergeDFNode); - // Incoming edges' destination port is updated - E->setDestPosition(N2InMap[E->getDestPosition()]); - // Incoming edges are added to the incoming edge list - // ( no need to add them in the outgoing edge list of source nodes, - // they are already there ) - mergeDFNode->addInDFEdge(E); - // Merge node is added to the successor list of the sources of the edges - E->getSourceDF()->addSuccessor(mergeDFNode); - } - - for (DFNode::outdfedge_iterator outdfedgeI = n2->outdfedge_begin(), - outdfedgeE = n2->outdfedge_end(); outdfedgeI != outdfedgeE; outdfedgeI++) { - DFEdge* E = *outdfedgeI; - // Outgoing edges are retargeted to start from the new node in graph - E->setSourceDF(mergeDFNode); - // Outgoing edges' source port is updated - E->setSourcePosition(N2OutMap[E->getSourcePosition()]); - // Outgoing edges are added to the outgoing edge list - // ( no need to add them in the incoming edge list of destination nodes, - // they are already there ) - mergeDFNode->addOutDFEdge(E); - // The destination node is added to the successor list of merge node - mergeDFNode->addSuccessor(E->getDestDF()); - } - - -// -------------------------------------------------------------------------- // - - -// -------------------------------------------------------------------------- // -// Updating the graph directly - - // Compute rank of mergeDFNode and update rank of successors - mergeDFNode->setRank((n1->getRank() > n2->getRank()) ? - (n1->getRank()) : (n2->getRank()) ); - - // Clear their incoming and outgoing edges vectors, and the successors list - n1->clearGraphElements(); - n2->clearGraphElements(); - - // Clear them from the parent graph - ParentNode->removeChildFromDFGraph(n1); - ParentNode->removeChildFromDFGraph(n2); - - /* - delete n1; - delete n2; - for (unsigned i = 0 ; i < DFEdgestoRemove.size(); i++) - delete DFEdgestoRemove[i]; -*/ - -// -------------------------------------------------------------------------- // - errs() << "Removing similar arguments\n"; - if (dyn_cast<DFLeafNode>(n1)) { - removeUnnecessaryInputEdges(mergeDFNode, n1, 0, 0); - // Erase old functions from module - n1->getFuncPointer()->replaceAllUsesWith(UndefValue::get(n1->getFuncPointer()->getType())); - n1->getFuncPointer()->eraseFromParent(); - n2->getFuncPointer()->replaceAllUsesWith(UndefValue::get(n2->getFuncPointer()->getType())); - n2->getFuncPointer()->eraseFromParent(); - - } else { - std::vector<DFNode*> AllocationNodes; - std::vector<DFNode*> ComputeNodes; - - // Get components of n1 - getChildNodeSplit(cast<DFInternalNode>(n1), AllocationNodes, ComputeNodes); - DFLeafNode* N1ComputeNode = cast<DFLeafNode>(ComputeNodes[0]); - DFLeafNode* N1AllocationNode = - (AllocationNodes.size() == 1) ? cast<DFLeafNode>(AllocationNodes[0]): NULL; - - AllocationNodes.clear(); - ComputeNodes.clear(); - - // Get components of n2 - getChildNodeSplit(cast<DFInternalNode>(n2), AllocationNodes, ComputeNodes); - DFLeafNode* N2AllocationNode = - (AllocationNodes.size() == 1) ? cast<DFLeafNode>(AllocationNodes[0]): NULL; - DFLeafNode* N2ComputeNode = cast<DFLeafNode>(ComputeNodes[0]); - - AllocationNodes.clear(); - ComputeNodes.clear(); - - // Get components of mergeDFNode - getChildNodeSplit(cast<DFInternalNode>(mergeDFNode), AllocationNodes, - ComputeNodes); - DFLeafNode* ComputeNode = cast<DFLeafNode>(ComputeNodes[0]); - - unsigned numOfN1AllocArgs = 0; - unsigned numOfN2AllocArgs = 0; - if (N1AllocationNode) { - StructType* F1RetTy = - cast<StructType>(N1AllocationNode->getFuncPointer()->getReturnType()); - numOfN1AllocArgs = F1RetTy->getNumElements(); - } - if (N2AllocationNode) { - StructType* F2RetTy = - cast<StructType>(N2AllocationNode->getFuncPointer()->getReturnType()); - numOfN2AllocArgs = F2RetTy->getNumElements(); - } - - errs() << "Removing unnecessary input arguments\n"; - removeUnnecessaryInputEdges(ComputeNode, N1ComputeNode, numOfN1AllocArgs, - numOfN2AllocArgs); - - N1ComputeNode->getFuncPointer()->replaceAllUsesWith(UndefValue::get(N1ComputeNode->getFuncPointer()->getType())); - N1ComputeNode->getFuncPointer()->eraseFromParent(); - N2ComputeNode->getFuncPointer()->replaceAllUsesWith(UndefValue::get(N2ComputeNode->getFuncPointer()->getType())); - N2ComputeNode->getFuncPointer()->eraseFromParent(); - } - - errs() << "Deleting internal nodes\n"; - - deleteInternalNodeFunction(n1, DFG); - deleteInternalNodeFunction(n2, DFG); - - errs() << "Returning\n"; - return; -} - -bool MergeDFN::runOnModule(Module &M) { - errs() << "\nMergeDFN PASS\n"; - - // Get the BuildDFG Analysis Results: - // - Dataflow graph - // - Maps from i8* handles to DFNode and DFEdge - BuildDFG &DFG = getAnalysis<BuildDFG>(); - - // DFInternalNode *Root = DFG.getRoot(); - std::vector<DFInternalNode*> Roots = DFG.getRoots(); - - // Visitor for Code Generation Graph Traversal - MergeTraversal *MergeLookup = new MergeTraversal(M, DFG); - - // Iterate over all the DFGs and produce code for each one of them - for (auto rootNode: Roots) { - // Initiate code generation for root DFNode - MergeLookup->visit(rootNode); - } - - if (MergeLookup->isValidMergeChoise()) { - errs() << "Valid Merge Choise. Begin merging..\n"; - DEBUG(errs() << "Valid Merge Choise. Begin merging..\n"); - MergeLookup->mergeDFN(); - } else { - errs() << "Not Valid Merge Choise. Abort merging.\n"; - DEBUG(errs() << "Not Valid Merge Choise. Abort merging.\n"); - } - - delete MergeLookup; - - return true; -} - -/****************************************************************************** - * Helper functions * - ******************************************************************************/ - -// Creates a new createNode intrinsic, similar to II but with different -// associated function F instead -IntrinsicInst* createIdenticalCreateNodeWithDifferentFunction(Function* F, - IntrinsicInst* II) { - Module* M = F->getParent(); - - // Find which createNode intrinsic we need to create - Function* CreateNodeF = Intrinsic::getDeclaration(M, II->getIntrinsicID()); - Constant* Fp = ConstantExpr::getPointerCast(F, - Type::getInt8PtrTy(II->getContext())); - - ArrayRef<Value*> CreateNodeArgs; - switch (II->getIntrinsicID()) { - case Intrinsic::visc_createNode: - { - CreateNodeArgs = ArrayRef<Value*>(Fp); - break; - } - case Intrinsic::visc_createNode1D: - { - Value* CreateNode1DArgs[] = {Fp, II->getArgOperand(1)}; - CreateNodeArgs = ArrayRef<Value*>(CreateNode1DArgs, 2); - break; - } - case Intrinsic::visc_createNode2D: - { - Value* CreateNode2DArgs[] = {Fp, II->getArgOperand(1), - II->getArgOperand(2)}; - CreateNodeArgs = ArrayRef<Value*>(CreateNode2DArgs, 3); - break; - } - case Intrinsic::visc_createNode3D: - { - Value* CreateNode3DArgs[] = {Fp, II->getArgOperand(1), - II->getArgOperand(2), - II->getArgOperand(3)}; - CreateNodeArgs = ArrayRef<Value*>(CreateNode3DArgs, 4); - break; - } - default : - assert(false && "Unknown createNode intrinsic"); - break; - } - - CallInst* CI = CallInst::Create(CreateNodeF, - CreateNodeArgs, - F->getName()+".node"); - IntrinsicInst* CreateNodeII = cast<IntrinsicInst>(CI); - return CreateNodeII; -} - -// Creates a new createNode intrinsic based on II. -// The new intrinsic has different associated function F instead. II is used to -// determine the location (in the parameter list of function Fargs) where the -// arguments of the new intrinsic can be found. -IntrinsicInst* createNewCreateNodeBasedOn(Function* F, IntrinsicInst* II, - Function* Fargs) { - Module* M = F->getParent(); - - // Find which createNode intrinsic we need to create - Function* CreateNodeF = Intrinsic::getDeclaration(M, II->getIntrinsicID()); - Constant* Fp = ConstantExpr::getPointerCast(F, - Type::getInt8PtrTy(II->getContext())); - - std::vector<Argument*> FArgList; - for (auto& arg: Fargs->getArgumentList()) { - FArgList.push_back(&arg); - } - - ArrayRef<Value*> CreateNodeArgs; - switch (II->getIntrinsicID()) { - case Intrinsic::visc_createNode: - { - CreateNodeArgs = ArrayRef<Value*>(Fp); - break; - } - case Intrinsic::visc_createNode1D: - { - Value* CreateNode1DArgs[] = {Fp, - FArgList[cast<Argument>(II->getArgOperand(1))->getArgNo()]}; - CreateNodeArgs = ArrayRef<Value*>(CreateNode1DArgs, 2); - break; - } - case Intrinsic::visc_createNode2D: - { - Value* CreateNode2DArgs[] = {Fp, - FArgList[cast<Argument>(II->getArgOperand(1))->getArgNo()], - FArgList[cast<Argument>(II->getArgOperand(2))->getArgNo()]}; - CreateNodeArgs = ArrayRef<Value*>(CreateNode2DArgs, 3); - break; - } - case Intrinsic::visc_createNode3D: - { - Value* CreateNode3DArgs[] = {Fp, - FArgList[cast<Argument>(II->getArgOperand(1))->getArgNo()], - FArgList[cast<Argument>(II->getArgOperand(2))->getArgNo()], - FArgList[cast<Argument>(II->getArgOperand(3))->getArgNo()]}; - CreateNodeArgs = ArrayRef<Value*>(CreateNode3DArgs, 4); - break; - } - default : - assert(false && "Unknown createNode intrinsic"); - break; - } - - CallInst* CI = CallInst::Create(CreateNodeF, - CreateNodeArgs, - F->getName()+".node"); - IntrinsicInst* CreateNodeII = cast<IntrinsicInst>(CI); - return CreateNodeII; -} - - -// create an identical createEdge with different src (true) or dst (false) node -IntrinsicInst* createIdenticalCreateEdgeWithDifferentNode(IntrinsicInst* II, -IntrinsicInst* IInode, bool srcnode) { - // Argument of the function to be called - Value* SrcNode = (srcnode) ? IInode: II->getArgOperand(0); - Value* DstNode = (srcnode) ? II->getArgOperand(1): IInode; - - Value* EdgeArgs[] = {SrcNode, DstNode, - II->getArgOperand(2), - II->getArgOperand(3), - II->getArgOperand(4), - II->getArgOperand(5) - }; - -// Function* EdgeF = Intrinsic::getDeclaration(&M, Intrinsic::visc_createEdge); - Function* EdgeF = II->getCalledFunction(); - CallInst* EdgeInst = CallInst::Create(EdgeF, - ArrayRef<Value*>(EdgeArgs, 6), - II->getName()+".repl"); - IntrinsicInst* newII = dyn_cast<IntrinsicInst>(EdgeInst); - assert(newII && "Cannot cast createEdge to IntrinsicInst"); - - return newII; -} - -// create an identical createEdge with different src (true) or dst (false) port -IntrinsicInst* createIdenticalCreateEdgeWithDifferentPort(IntrinsicInst* II, -unsigned port, bool srcport) { - // Argument of the function to be called - ConstantInt* PortConstant = - ConstantInt::get(Type::getInt32Ty(II->getContext()), port); - Value* SrcPort = (srcport) ? PortConstant: II->getArgOperand(3); - Value* DstPort = (srcport) ? II->getArgOperand(4): PortConstant; - - Value* EdgeArgs[] = {II->getArgOperand(0), - II->getArgOperand(1), - II->getArgOperand(2), - SrcPort, DstPort, - II->getArgOperand(5) - }; - -// Function* EdgeF = Intrinsic::getDeclaration(&M, Intrinsic::visc_createEdge); - Function* EdgeF = II->getCalledFunction(); - CallInst* EdgeInst = CallInst::Create(EdgeF, - ArrayRef<Value*>(EdgeArgs, 6), - II->getName()+".repl"); - IntrinsicInst* newII = dyn_cast<IntrinsicInst>(EdgeInst); - assert(newII && "Cannot cast createEdge to IntrinsicInst"); - - return newII; -} - -// create an identical bindInput with different destination node -IntrinsicInst* createIdenticalBindInputWithDifferentNode(IntrinsicInst* II, - IntrinsicInst* IInode) { - Value* BindArgs[] = {IInode, - II->getArgOperand(1), - II->getArgOperand(2), - II->getArgOperand(3) - }; -// Function* BindF = Intrinsic::getDeclaration(&M, Intrinsic::visc_bind_input); - Function* BindF = II->getCalledFunction(); - CallInst* BindInst = CallInst::Create(BindF, - ArrayRef<Value*>(BindArgs, 4), - ""); - IntrinsicInst* newII = dyn_cast<IntrinsicInst>(BindInst); - assert(newII && "Cannot cast bind_output to IntrinsicInst"); - - return newII; -} - -// create an identical bindInput with different src (true) or dst (false) port -IntrinsicInst* createIdenticalBindInputWithDifferentPort(IntrinsicInst* II, - unsigned port, - bool srcport) { - // Argument of the function to be called - ConstantInt* PortConstant = - ConstantInt::get(Type::getInt32Ty(II->getContext()), port); - Value* SrcPort = (srcport) ? PortConstant: II->getArgOperand(1); - Value* DstPort = (srcport) ? II->getArgOperand(2): PortConstant; - - Value* BindArgs[] = {II->getArgOperand(0), - SrcPort, - DstPort, - II->getArgOperand(3) - }; -// Function* BindF = Intrinsic::getDeclaration(&M, Intrinsic::visc_bind_input); - Function* BindF = II->getCalledFunction(); - CallInst* BindInst = CallInst::Create(BindF, - ArrayRef<Value*>(BindArgs, 4), - ""); - IntrinsicInst* newII = dyn_cast<IntrinsicInst>(BindInst); - assert(newII && "Cannot cast bind_output to IntrinsicInst"); - - return newII; -} - -// create an identical bindOutput with different source node -IntrinsicInst* createIdenticalBindOutputWithDifferentNode(IntrinsicInst* II, - IntrinsicInst* IInode) { - Value* BindArgs[] = {IInode, - II->getArgOperand(1), - II->getArgOperand(2), - II->getArgOperand(3) - }; -// Function* BindF = Intrinsic::getDeclaration(&M, Intrinsic::visc_bind_output); - Function* BindF = II->getCalledFunction(); - CallInst* BindInst = CallInst::Create(BindF, - ArrayRef<Value*>(BindArgs, 4), - ""); - IntrinsicInst* newII = dyn_cast<IntrinsicInst>(BindInst); - assert(newII && "Cannot cast bind_output to IntrinsicInst"); - - return newII; -} - -// create an identical bindOutput with different src (true) or dst (false) port -IntrinsicInst* createIdenticalBindOutputWithDifferentPort(IntrinsicInst* II, - unsigned port, - bool srcport) { - // Argument of the function to be called - ConstantInt* PortConstant = - ConstantInt::get(Type::getInt32Ty(II->getContext()), port); - Value* SrcPort = (srcport) ? PortConstant: II->getArgOperand(1); - Value* DstPort = (srcport) ? II->getArgOperand(2): PortConstant; - - Value* BindArgs[] = {II->getArgOperand(0), - SrcPort, - DstPort, - II->getArgOperand(3) - }; -// Function* BindF = Intrinsic::getDeclaration(&M, Intrinsic::visc_bind_output); - Function* BindF = II->getCalledFunction(); - CallInst* BindInst = CallInst::Create(BindF, - ArrayRef<Value*>(BindArgs, 4), - ""); - IntrinsicInst* newII = dyn_cast<IntrinsicInst>(BindInst); - assert(newII && "Cannot cast bind_output to IntrinsicInst"); - - return newII; -} - -// Function to find each use of a createNode intrinsic for a node existing -// before merging and properly replace it with a use of the createNode for -// node created after node merging -// II1 is the createNode for the node that got merged, whose uses we want to replace -// II2 is the createNode for the other node that got merged -// (we need this to determine if en edge should be updated or deleted) -// InMap and Outmap maps map the old location of an argument/output to the new -// one, after edges removed and functions merged -// CreateEdge for edges from n1 to n2 need to be deleted and associated -// intrinsics to be removed. They are placed in the two vectors. -void updateUsesOfCreateNodeInParent(IntrinsicInst* II1, - IntrinsicInst* II2, - IntrinsicInst* IInew, - std::map<unsigned, unsigned> InMap, - std::map<unsigned, unsigned> OutMap, - std::vector<DFEdge*> &DFEdgestoRemove, - BuildDFG &DFG) { - std::vector<IntrinsicInst*> IItoRemove; - - for (Value::user_iterator i = II1->user_begin(), ie = II1->user_end(); - i != ie; ++i) { - Instruction *VI = dyn_cast<Instruction>(*i); - IntrinsicInst* II = dyn_cast<IntrinsicInst>(VI); - assert(II && "Use of a node handle outside of a visc intrinsic"); - - switch(II->getIntrinsicID()) { - case Intrinsic::visc_createEdge: - { - if (isOutgoingEdgeIntrinsic(II,II1)) { // check for outgoing edges - if (isIncomingEdgeIntrinsic(II,II2)) { - // edge is between merged nodes - // createEdge is marked for deletion, if not already there - if (std::find(IItoRemove.begin(),IItoRemove.end(),II) == IItoRemove.end()) { - IItoRemove.push_back(II); - // ------------------------------------------------------------ // - // Updating the BuildDFG result - // remove handle for non-existing edge in mapping - DFEdge* EdgeInMapping = DFG.getHandleToDFEdgeMap()[II]; - DFG.removeElementFromHandleToDFEdgeMap(II); - DFEdgestoRemove.push_back(EdgeInMapping); - // ------------------------------------------------------------ // - } - } else { // Edge is outgoing, but to another node in the graph - // We need to change Src and SrcPort - // create an identical createEdge with different srcport - unsigned srcPos = cast<ConstantInt>(II->getOperand(3))->getZExtValue(); - IntrinsicInst* newII = - createIdenticalCreateEdgeWithDifferentPort(II, - OutMap[srcPos], - true); - // and insert it before the current create edge - newII->insertBefore(II); - // change of operand II1 will happen at the end with replaceAllUsesWith - // mark this createEdge for deletion - IItoRemove.push_back(II); - // -------------------------------------------------------------- // - // Updating the BuildDFG result - // replace handle for edge in mapping - DFEdge* EdgeInMapping = DFG.getHandleToDFEdgeMap()[II]; - DFG.removeElementFromHandleToDFEdgeMap(II); - DFG.addElementToHandleToDFEdgeMap(newII, EdgeInMapping); - // -------------------------------------------------------------- // - } - } else { // isIncomingEdgeIntrinsic(II,II1) : check for incoming edges - if (isOutgoingEdgeIntrinsic(II,II2)) { - // edge is between merged nodes - // createEdge is marked for deletion, if not already there - if (std::find(IItoRemove.begin(),IItoRemove.end(),II) == IItoRemove.end()) { - IItoRemove.push_back(II); - // ------------------------------------------------------------ // - // Updating the BuildDFG result - // remove handle for non-existing edge in mapping - DFEdge* EdgeInMapping = DFG.getHandleToDFEdgeMap()[II]; - DFG.removeElementFromHandleToDFEdgeMap(II); - DFEdgestoRemove.push_back(EdgeInMapping); - // ------------------------------------------------------------ // - } - } else { // Edge is incoming, but from another node - // We need to change Dst node and DstPort - // create an identical createEdge with different dstport - unsigned dstPos = cast<ConstantInt>(II->getOperand(4))->getZExtValue(); - IntrinsicInst* newII = - createIdenticalCreateEdgeWithDifferentPort(II, - InMap[dstPos], - false); - // and insert it before the current create edge - newII->insertBefore(II); - // change of operand II1 will happen at the end with replaceAllUsesWith - // mark this createEdge for deletion - IItoRemove.push_back(II); - // -------------------------------------------------------------- // - // Updating the BuildDFG result - // replace handle for edge in mapping - DFEdge* EdgeInMapping = DFG.getHandleToDFEdgeMap()[II]; - DFG.removeElementFromHandleToDFEdgeMap(II); - DFG.addElementToHandleToDFEdgeMap(newII, EdgeInMapping); - // -------------------------------------------------------------- // - } - } - } - break; - case Intrinsic::visc_bind_input: - { - // incoming bind from parent node - // We need to change Dst node and DstPort - // create an identical bindInput with different dstport - unsigned dstPos = cast<ConstantInt>(II->getOperand(2))->getZExtValue(); - IntrinsicInst* newII = - createIdenticalBindInputWithDifferentPort(II, InMap[dstPos], false); - // and insert it before the current bind - newII->insertBefore(II); - // change of operand II1 will happen at the end with replaceAllUsesWith - // mark this bind for deletion - IItoRemove.push_back(II); - // ------------------------------------------------------------------ // - // Updating the BuildDFG result - // replace handle for edge in mapping - DFEdge* EdgeInMapping = DFG.getHandleToDFEdgeMap()[II]; - DFG.removeElementFromHandleToDFEdgeMap(II); - DFG.addElementToHandleToDFEdgeMap(newII, EdgeInMapping); - // ------------------------------------------------------------------ // - } - break; - case Intrinsic::visc_bind_output: - { - // outgoing bind to parent node - // We need to change Src node and SrcPort - // create an identical bindOutput with different srcport - unsigned srcPos = cast<ConstantInt>(II->getOperand(1))->getZExtValue(); - IntrinsicInst* newII = - createIdenticalBindOutputWithDifferentPort(II, OutMap[srcPos], true); - // and insert it before the current bind - newII->insertBefore(II); - // change of operand II1 will happen at the end with replaceAllUsesWith - // mark this bind for deletion - IItoRemove.push_back(II); - // ------------------------------------------------------------------ // - // Updating the BuildDFG result - // replace handle for edge in mapping - DFEdge* EdgeInMapping = DFG.getHandleToDFEdgeMap()[II]; - DFG.removeElementFromHandleToDFEdgeMap(II); - DFG.addElementToHandleToDFEdgeMap(newII, EdgeInMapping); - // ------------------------------------------------------------------ // - } - break; - default : - assert(false && "Unknown use of node handle"); - break; - } - } - - // Delete gathered instructions - for (std::vector<IntrinsicInst *>::iterator ib = IItoRemove.begin(), - ie = IItoRemove.end(); ib != ie; ++ib) { - DEBUG(errs() << "Erasing: " << **ib << "\n"); - (*ib)->eraseFromParent(); - } - - // Change all remaining edge-bind intrinsics containing n1 to the new node - II1->replaceAllUsesWith(IInew); - -} - -// Query the king of edge described by a createEdge intrinsic -// with respect to node handle IIn -bool isIncomingEdgeIntrinsic(IntrinsicInst* IIe, IntrinsicInst* IIn) { - Value* Src = IIe->getArgOperand(1); - IntrinsicInst* ArgII = cast<IntrinsicInst>(Src); -// IntrinsicInst* ArgII = cast<IntrinsicInst>(Src->stripPointerCasts()); - assert(ArgII && "First argument of createEdge is not an intrinsic"); - return (ArgII == IIn); -} - -bool isOutgoingEdgeIntrinsic(IntrinsicInst* IIe, IntrinsicInst* IIn) { - Value* Src = IIe->getArgOperand(0); - IntrinsicInst* ArgII = cast<IntrinsicInst>(Src); -// IntrinsicInst* ArgII = cast<IntrinsicInst>(Src->stripPointerCasts()); - assert(ArgII && "First argument of createEdge is not an intrinsic"); - return (ArgII == IIn); -} - -/* - * Return true if n2 is a successor of n1 - */ -bool hasSuccessor(DFNode* N1, DFNode* N2) { - for (DFNode::const_successor_iterator i = N1->successors_begin(), - e = N1->successors_end(); - i != e; i++) { - DFNode* N = *i; - if ((N == N2) || (hasSuccessor(N,N1))) return true; - } - return false; -} - -/* - * Return true if n2 is an immediate successor of n1 - */ -bool hasImmediateSuccesssor(DFNode* N1, DFNode* N2) { - for (DFNode::const_successor_iterator i = N1->successors_begin(), - e = N1->successors_end(); - i != e; i++) { - DFNode* N = *i; - if (N == N2) return true; - } - return false; -} - -/* - * Return true if all edges between n1 and n2 are one-to-one - */ -bool checkEdgesType(DFNode* N1, DFNode* N2) { - for (DFNode::const_outdfedge_iterator i = N1->outdfedge_begin(), - e = N1->outdfedge_end(); - i != e; i++) { - DFEdge* E = *i; - if ((E->getDestDF() == N2) && (E->getEdgeType())) return false; - } - return true; -} - -// Construct argument list -// Assuming that N2 cannot be an ansestor of N1 -static void createArgTypes(DFNode* N1, DFNode* N2, std::vector<Type*> &ArgTypes) { - Function* F1 = N1->getFuncPointer(); - Function* F2 = N2->getFuncPointer(); - - for(auto& arg: F1->getArgumentList()) { - DEBUG(errs() << arg << "\n"); - ArgTypes.push_back(arg.getType()); - } - - unsigned inport = 0; - for(auto& arg: F2->getArgumentList()) { - DEBUG(errs() << arg << "\n"); - if (N2->getExtendedInDFEdgeAt(inport)->getSourceDF() != N1) - ArgTypes.push_back(arg.getType()); - inport++; - } - -} - -// Returns the allocation nodes and the compute nodes of a parent dataflow node -void getChildNodeSplit(DFInternalNode* N, - std::vector<DFNode*> &AllocationNodes, - std::vector<DFNode*> &ComputeNodes) { - DFGraph::const_children_iterator ci = N->getChildGraph()->begin(); - DFGraph::const_children_iterator ce = N->getChildGraph()->end(); - - for ( ; ci != ce; ci++ ) { - DFNode* child = *ci; - if (child->isAllocationNode()) - AllocationNodes.push_back(child); - else if (!child->isDummyNode()) - ComputeNodes.push_back(child); - } - -} - -// Creates a map between the old locations of parameters and outputs in the -// functions before merging, and the new one after merge. Those that correspond -// to edges that no longer exist (between the merged nodes) are not in the maps. -void buildInputAndOutputMaps(DFNode* N1, DFNode* N2, - std::map<unsigned, unsigned> &N1InMap, - std::map<unsigned, unsigned> &N1OutMap, - std::map<unsigned, unsigned> &N2InMap, - std::map<unsigned, unsigned> &N2OutMap) { - unsigned n1NumInputs = 0; - for (unsigned i = 0; i < N1->getFuncPointer()->getArgumentList().size(); - i++, n1NumInputs++) { - N1InMap[i] = i; - } - for (unsigned i = 0, inpos = 0; - i < N2->getFuncPointer()->getArgumentList().size(); i++) { - if (N2->getExtendedInDFEdgeAt(i)->getSourceDF() != N1) { - N2InMap[i] = inpos+n1NumInputs; - inpos++; - } - } - - unsigned n1NumOutputs = 0; - StructType* F1RetTy = cast<StructType>(N1->getFuncPointer()->getReturnType()); - for (unsigned i = 0; i < F1RetTy->getNumElements(); i++) { - if (N1->getExtendedOutDFEdgeAt(i)->getDestDF() != N2) { - N1OutMap[i] = n1NumOutputs; - n1NumOutputs++; - } - } - - StructType* F2RetTy = cast<StructType>(N2->getFuncPointer()->getReturnType()); - for (unsigned i = 0; i < F2RetTy->getNumElements(); i++) { - N2OutMap[i] = i+n1NumOutputs; - } - - return; -} - -// Creates a map between the old edge ports in the -// nodes before merging, and the new one after merge. Those that correspond -// to edges that no longer exist (between the merged nodes) are not in the maps. -void buildInAndOutEdgeMaps(DFNode* N1, DFNode* N2, - std::map<unsigned, unsigned> &N1InMap, - std::map<unsigned, unsigned> &N1OutMap, - std::map<unsigned, unsigned> &N2InMap, - std::map<unsigned, unsigned> &N2OutMap) { - - unsigned n1NumInEdges = N1->getFuncPointer()->getArgumentList().size(); - for (unsigned i = 0; i < n1NumInEdges; i++) { - N1InMap[i] = i; - } - - unsigned n1NumOutEdges = 0; - StructType* F1RetTy = cast<StructType>(N1->getFuncPointer()->getReturnType()); - for (unsigned i = 0; i < F1RetTy->getNumElements(); i++) { - if (N1->getExtendedOutDFEdgeAt(i)->getDestDF() != N2) { - N1OutMap[i] = n1NumOutEdges; - n1NumOutEdges++; - } - } - - unsigned n2NumInEdges = N2->getFuncPointer()->getArgumentList().size(); - for (unsigned i = 0, inpos = 0; i < n2NumInEdges; i++) { - if (N2->getExtendedInDFEdgeAt(i)->getSourceDF() != N1) { - N2InMap[i] = inpos+n1NumInEdges; - inpos++; - } - } - - StructType* F2RetTy = cast<StructType>(N2->getFuncPointer()->getReturnType()); - for (unsigned i = 0; i < F2RetTy->getNumElements(); i++) { - N2OutMap[i] = i+n1NumOutEdges; - } - - return; -} - -// Construct return type -// Assuming that N2 cannot be an ansestor of N1 -static StructType* createReturnType(DFNode* N1, DFNode* N2) { - Function* F1 = N1->getFuncPointer(); - Function* F2 = N2->getFuncPointer(); - - StructType* F1RetTy = dyn_cast<StructType>(F1->getReturnType()); - assert(F1RetTy && "Return Type must always be a struct"); - StructType* F2RetTy = dyn_cast<StructType>(F2->getReturnType()); - assert(F2RetTy && "Return Type must always be a struct"); - - std::vector<Type*> ReturnTypeElements; - unsigned outPos1 = 0, outPos2 = 0, outPosM = 0; - for (StructType::element_iterator i = F1RetTy->element_begin(), - e = F1RetTy->element_end(); - (i != e) && (outPos1 < F1RetTy->getNumElements()); i++, outPos1++) { - if (N1->getExtendedOutDFEdgeAt(outPos1)->getDestDF() == N2) - continue; - ReturnTypeElements.push_back(*i); - outPosM++; - } - - for (StructType::element_iterator i = F2RetTy->element_begin(), - e = F2RetTy->element_end(); - i != e && outPos2 < F2RetTy->getNumElements(); i++, outPos2++) { - ReturnTypeElements.push_back(*i); - outPosM++; - } - - errs() << "Return elements = " << ReturnTypeElements.size() << "\n"; - StructType* FRetTy = StructType::create(F1->getContext(), - ArrayRef<Type*>(ReturnTypeElements), - (F1->getName()+"."+F2->getName()+".ty").str(), true); - - errs() << "Struct type created\n"; - return FRetTy; -} - -// Copy attributes -// Assuming that N2 cannot be an ansestor of N1 -static void copyAttrList(DFNode* N1, DFNode* N2, Function* F) { - Function* F1 = N1->getFuncPointer(); - Function* F2 = N2->getFuncPointer(); - - Function::arg_iterator f1_ai = F1->arg_begin(), f1_ae = F1->arg_end(); - Function::arg_iterator f2_ai = F2->arg_begin(), f2_ae = F2->arg_end(); - Function::arg_iterator f_ai = F->arg_begin(), f_ae = F->arg_end(); - - unsigned inPos1 = 0, inPos2 = 0, inPosM = 0; - for(; f1_ai != f1_ae && f_ai != f_ae; ++f1_ai, ++f_ai, inPos1++, inPosM++) { - AttributeSet AS = F1->getAttributes(); - DEBUG(errs() << "Copying attributes from " << F1->getName() << " at " << f1_ai->getArgNo() << "\n"); - AttrBuilder AB(AS, f1_ai->getArgNo()+1); - AttributeSet argAS = AttributeSet::get(F1->getContext(), f_ai->getArgNo()+1, AB); - F->addAttributes(f_ai->getArgNo()+1, argAS); - } - for(; f2_ai != f2_ae && f_ai != f_ae; ++f2_ai, inPos2++) { - if (N2->getExtendedInDFEdgeAt(inPos2)->getSourceDF() == N1) - continue; - - AttributeSet AS = F2->getAttributes(); - DEBUG(errs() << "Copying attributes from " << F2->getName() << " at " << f2_ai->getArgNo() << "\n"); - AttrBuilder AB(AS, f2_ai->getArgNo()+1); - AttributeSet argAS = AttributeSet::get(F2->getContext(), f_ai->getArgNo()+1, AB); - F->addAttributes(f_ai->getArgNo()+1, argAS); - ++f_ai; - inPosM++; - } -} - -// Copy argument names -static void copyArgumentNames(DFNode* N1, DFNode* N2, Function* F) { - Function* F1 = N1->getFuncPointer(); - Function* F2 = N2->getFuncPointer(); - - Function::arg_iterator dest_it = F->arg_begin(); - - for(auto& arg: F1->getArgumentList()) { - dest_it->setName("n1_" + arg.getName()); - dest_it++; - } - - unsigned inport = 0; - for(auto& arg: F2->getArgumentList()) { - if (N2->getExtendedInDFEdgeAt(inport)->getSourceDF() != N1) { - dest_it->setName("n2_" + arg.getName()); - dest_it++; - } - inport++; - } -} - -// Creates shift map, which maps old position to new, after shifting num -// arguments starting from fromPos by shift positions to the right. -void createShiftMap(Function* F, unsigned fromPos, unsigned num, - unsigned shift, std::vector<unsigned> &ShiftMap) { - - for (unsigned i = 0; i < F->getArgumentList().size(); i++) - ShiftMap.push_back(i); - - for (unsigned i = fromPos; i < fromPos + num; i++) - ShiftMap[i] += shift; - - for (unsigned i = fromPos + num; i < fromPos + num + shift; i++) - ShiftMap[i] -= num; - -} - -// Shifts num arguments starting from fromPos by shift positions to the right, -// replacing with the arguments at those positions. -// Updates shift map, which maps old position to new. -void shiftArgs(Function* F, unsigned fromPos, unsigned num, - unsigned shift, std::vector<unsigned> &ShiftMap) { - Function::arg_iterator ai = F->arg_begin(), ae = F->arg_end(); - Function::arg_iterator from = ai; - - unsigned cnt; - for (cnt = 0; from != ae && cnt < fromPos; from++, cnt++) { - } - assert((cnt == fromPos) && "Invalid start position for argument shifting"); - - Function::arg_iterator af = from; - std::vector<Type*> ArgTypes; - std::vector<StringRef> ArgNames; - unsigned argNo = 0; - - //TODO: check if this copies attributes as well - ValueToValueMapTy VMap; - Function* F_copy = CloneFunction(F, VMap); - F_copy->removeFromParent(); - - // Arguments up until before from - for ( ; ai != from && ai != ae; ai++, argNo++) { - ArgTypes.push_back(ai->getType()); - ArgNames.push_back(ai->getName()); - } - - // Arguments to be shifted (num arguments) are skipped for now - for (unsigned i = 0; (i < num) && (ai != ae); i++, ai++, argNo++) { - ShiftMap[argNo] += shift; - } - - // Later arguments (#shift arguments) are pushed until we fill shift positions - for (unsigned i = 0; (ai != ae) && (i < shift); i++, ai++, argNo++) { - ArgTypes.push_back(ai->getType()); - ArgNames.push_back(ai->getName()); - ShiftMap[argNo] -= num; - } - - // Arguments that were to be shifted (num arguments) are now pushed - for (unsigned i = 0; (i < num) && (af != ae); i++, af++, argNo++) { - ArgTypes.push_back(af->getType()); - ArgNames.push_back(af->getName()); - } - - // Remaining arguments are pushed - for (; ai != ae; ai++) { - ArgTypes.push_back(ai->getType()); - ArgNames.push_back(ai->getName()); - } - - // Change function type - FunctionType* FTy = FunctionType::get(F->getReturnType(), ArgTypes, F->isVarArg()); - PointerType* PTy = FTy->getPointerTo(); - F->mutateType(PTy); - - // Shift argument names - ai = F->arg_begin(); - for (unsigned i = 0; ai != ae; ai++, i++) { - (*ai).setName(ArgNames[i]); - } - - // Shift attributes by deleting them from F and copying them from F_copy - - //Initialize required iterators for shift elements: from -> from_copy+shift - af = from; - Function::arg_iterator af_copy; - for (unsigned i = 0; i < af->getArgNo() + shift; i++, af_copy++) { - } - for (unsigned i = 0; i < num; i++, af++, af_copy++) { - AttributeSet ASf = F->getAttributes(); - AttributeSet ASfc = F_copy->getAttributes(); - - AttrBuilder ABfc(ASfc, af_copy->getArgNo()+1); - AttributeSet argASfc = AttributeSet::get(F_copy->getContext(), af->getArgNo()+1, ABfc); - F->removeAttributes(af->getArgNo()+1,ASf.getParamAttributes(af->getArgNo()+1)); - F->addAttributes(af->getArgNo()+1, argASfc); - } - //Initialize required iterators for num elements: to -> to_copy-num - af_copy = from; - for (unsigned i = 0; i < shift; i++, af++, af_copy++) { - AttributeSet ASf = F->getAttributes(); - AttributeSet ASfc = F_copy->getAttributes(); - - AttrBuilder ABfc(ASfc, af_copy->getArgNo()+1); - AttributeSet argASfc = AttributeSet::get(F_copy->getContext(), af->getArgNo()+1, ABfc); - F->removeAttributes(af->getArgNo()+1,ASf.getParamAttributes(af->getArgNo()+1)); - F->addAttributes(af->getArgNo()+1, argASfc); - } -} - -/* - * Create type of merged function - * - input arguments type - * - struct return type - * Get Attributes from original functions - * Get parameter names from original functions - * Insert an empty function of this type in the module - */ -static Function* createEmptyDFNodeFunction(DFNode* N1, DFNode* N2, Module &M) { - Function* F1 = N1->getFuncPointer(); - Function* F2 = N2->getFuncPointer(); - - errs () << "Constructing argument list\n"; - // Construct argument list - std::vector<Type*> ArgTypes; - createArgTypes(N1, N2, ArgTypes); - - errs () << "Constructing return type\n"; - // Construct return type - StructType* FRetTy = createReturnType(N1, N2); - - FunctionType* FTy = FunctionType::get(FRetTy, ArgTypes, false); - // Create a function with the new type - Function* F = Function::Create(FTy, F1->getLinkage(), - F1->getName()+"_"+F2->getName(), &M); - - errs () << "Copying argument names\n"; - // Copy argument names from original functions - copyArgumentNames(N1, N2, F); - // Copy argument attributes from original functions - copyAttrList(N1, N2, F); - - return F; -} - -/* - * Create function of leaf node after merging - * - create type - * - Create the call instructions - * - Create intermediate assignments - * - Create assignments to output struct - */ -static Function* createLeafDFNodeFunction(DFNode* N1, DFNode* N2, Module &M, - unsigned numOfN1AllocArgs, unsigned posOfN1AllocArgs, - unsigned numOfN2AllocArgs) { - - errs () << "Creating function signature\n"; - /* - * Create empty node function of the correct type - */ - Function* F = createEmptyDFNodeFunction(N1, N2, M); - - // Get return type, needed for building the assignmens to the return struct - StructType* FRetTy = cast<StructType>(F->getReturnType()); - - Function* F1 = N1->getFuncPointer(); - Function* F2 = N2->getFuncPointer(); - - errs () << "Creating function body\n"; - // This maps i: position in F argument list, to new position in F argument - // list (after shifting arguments maybe). Initially, no shift. - std::vector<unsigned> FArgsShiftMap(F->getArgumentList().size()); - for (unsigned i = 0; i < FArgsShiftMap.size(); i++) - FArgsShiftMap[i] = i; - - if (numOfN1AllocArgs) { - // Number of remaining f2 parameters is initial parameter number of f2 - // minus the number of edges between n1 and n2. We can also find this by - // getting the number of parameters of the new function F and subtract the - // number of parameters of F1, since this did not change. - unsigned shiftOfN1AllocArgs = F->getArgumentList().size() - - F1->getArgumentList().size() - - numOfN2AllocArgs; - shiftArgs(F, posOfN1AllocArgs, numOfN1AllocArgs, shiftOfN1AllocArgs, - FArgsShiftMap); - } - - - // Add a basic block to the new, empty function - BasicBlock *BB = BasicBlock::Create(M.getContext(), "entry", F); - ReturnInst* RI = ReturnInst::Create(M.getContext(), - UndefValue::get(FRetTy), BB); - - - errs () << "Creating function call\n"; - // Get Argument list of new function into a vector (for easier indexing) - std::vector<Value*> FArgs; - for (auto& arg: F->getArgumentList()) { - FArgs.push_back(&arg); - } - - // Create call instruction for first node - std::vector<Value*> Args; - for (unsigned i = 0; i < F1->getArgumentList().size(); i++) { - Args.push_back(FArgs[FArgsShiftMap[i]]); - } - CallInst* CI1 = CallInst::Create(F1, - ArrayRef<Value*>(Args), - "merged."+F1->getName(), - RI); - Args.clear(); - - errs () << "Creating function call for second node\n"; - // Create call instruction for second node - for(unsigned fargNo = 0, i = 0; - i < F2->getArgumentList().size(); i++) { - Value* Arg; - if (N2->getExtendedInDFEdgeAt(i)->getSourceDF() == N1) { - ExtractValueInst *EI = - ExtractValueInst::Create(CI1, - N2->getExtendedInDFEdgeAt(i)->getSourcePosition(), - "", - RI); - Arg = EI; - } else { - Arg = FArgs[FArgsShiftMap[F1->getArgumentList().size() + fargNo++]]; - } - Args.push_back(Arg); - } - - CallInst* CI2 = CallInst::Create(F2, - ArrayRef<Value*>(Args), - "merged."+F2->getName(), - RI); - - errs () << "Creating extract element instructions\n"; - // Create extract element instructions for elements of output struct - std::vector<ExtractValueInst *> ExtractValueInstVec; - - // First, from node n1: exclude those that go to n2 - StructType *F1RetTy = dyn_cast<StructType>(F1->getReturnType()); - for (unsigned i = 0; i < F1RetTy->getNumElements(); i++) { - if (N1->getExtendedOutDFEdgeAt(i)->getDestDF() != N2) { - ExtractValueInst *EI = ExtractValueInst::Create(CI1, i, "", RI); - ExtractValueInstVec.push_back(EI); - } - } - // Then, from node n2 - StructType *F2RetTy = dyn_cast<StructType>(F2->getReturnType()); - for (unsigned i = 0; i < F2RetTy->getNumElements(); i++) { - ExtractValueInst *EI = ExtractValueInst::Create(CI2, i, "", RI); - ExtractValueInstVec.push_back(EI); - } - - errs () << "Creating output struct\n"; - // Create output struct of type FRetTy - assert(FRetTy->getNumElements() == ExtractValueInstVec.size() && - "Size of output struct does not match expected number of EE instructions"); - Value* retVal = UndefValue::get(F->getReturnType()); - - for (unsigned i = 0; i < ExtractValueInstVec.size(); i++) { - InsertValueInst *IVI = - InsertValueInst::Create(retVal, ExtractValueInstVec[i], i, "", RI); - retVal = IVI; - } - ReturnInst* newRI = ReturnInst::Create(M.getContext(), retVal); - ReplaceInstWithInst(RI, newRI); - - // Inline the two calls - InlineFunctionInfo IFI1, IFI2; - InlineFunction(CI1, IFI1, nullptr, false); - InlineFunction(CI2, IFI2, nullptr, false); - - return F; -} - -static Function* createInternalDFNodeFunction(DFNode* N1, DFNode* N1an, - DFNode* N1cn, DFNode* N2, DFNode* N2an, DFNode* N2cn, Function* Fa, - Function* Fc, Module &M, unsigned numOfN1AllocArgs, unsigned posOfN1AllocArgs, - unsigned numOfN2AllocArgs) { - - /* - * Create empty node function of the correct type - */ - Function* F = createEmptyDFNodeFunction(N1, N2, M); - - // Get return type, needed for building the assignmens to the return struct - StructType* FRetTy = cast<StructType>(F->getReturnType()); - -// Function* F1 = N1->getFuncPointer(); -// Function* F2 = N2->getFuncPointer(); - - // Add a basic block to the new, empty function - BasicBlock *BB = BasicBlock::Create(M.getContext(), "entry", F); - ReturnInst* RI = ReturnInst::Create(M.getContext(), - UndefValue::get(FRetTy), BB); - - // Get Argument list of new function into a vector (for easier indexing) - std::vector<Value*> FArgs; - for (auto& arg: F->getArgumentList()) { - FArgs.push_back(&arg); - } - - // Get pointers to functions inthe original graph -// Function* F1a = (N1an) ? N1an->getFuncPointer() : NULL; -// Function* F2a = (N2an) ? N2an->getFuncPointer() : NULL; -// Function* F1c = N1cn->getFuncPointer(); -// Function* F2c = N2cn->getFuncPointer(); - - // Create the required createNode intrinsics - IntrinsicInst* AllocII = NULL; - if (N1an) - AllocII = createIdenticalCreateNodeWithDifferentFunction(Fa, - N1an->getInstruction()); - else if (N2an) - AllocII = createIdenticalCreateNodeWithDifferentFunction(Fa, - N2an->getInstruction()); - if (AllocII) - AllocII->insertBefore(RI); - - // The position in F (new node function) of the node dimensions parameters is - // the same as it was in n1 internal node function, because n1 is the first - // one to be added to the resulting merged node. - IntrinsicInst* ComputeII = - createNewCreateNodeBasedOn(Fc, N1cn->getInstruction(), F); - ComputeII->insertBefore(RI); - - // Vector to be populated with instructions to be added to internal node - std::vector<IntrinsicInst*> IntrinsicInstructionsToAdd; - std::vector<IntrinsicInst*> IntermediateInstructions; - - createNewInternalNodeIntrinsics(N1, N2, N1an, N1cn, N2an, N2cn, - AllocII, ComputeII, - Fa /* FIXME: Unused */, Fc, - IntrinsicInstructionsToAdd, - IntermediateInstructions); - - // Insert generated intrinsics at new internal function - for (auto& Inst: IntrinsicInstructionsToAdd) { - Inst->insertBefore(RI); - } - - // Insert generated intrinsics at new internal function and erase - for (auto& Inst: IntermediateInstructions) { - Inst->insertBefore(RI); - Inst->eraseFromParent(); - } - - return F; -} - -void createNewInternalNodeIntrinsics(DFNode* N1, - DFNode* N2, - DFNode* N1a, - DFNode* N1c, - DFNode* N2a, - DFNode* N2c, - IntrinsicInst* IInewa, - IntrinsicInst* IInewc, - Function* Fa, //FIXME: Unused - Function* Fc, - std::vector<IntrinsicInst*>& IntrinsicInstructionsToAdd, - std::vector<IntrinsicInst*>& IntermediateInstructions) { - IntrinsicInst* II1a = (N1a) ? N1a->getInstruction() : NULL; - IntrinsicInst* II1c = N1c->getInstruction(); - IntrinsicInst* II2a = (N2a) ? N2a->getInstruction() : NULL; - IntrinsicInst* II2c = N2c->getInstruction(); - - Function* F1a = (N1a) ? N1a->getFuncPointer() : NULL; - Function* F1c = N1c->getFuncPointer(); - Function* F2a = (N2a) ? N1a->getFuncPointer() : NULL; - - unsigned n1aNumOfInputs = 0; - unsigned n1aNumOfOutputs = 0; - unsigned n1aPosOfOutputs = 0; - if (N1a) { - n1aNumOfInputs = F1a->getArgumentList().size(); - n1aNumOfOutputs = cast<StructType>(F1a->getReturnType())->getNumElements(); - n1aPosOfOutputs = N1a->getOutDFEdgeAt(0)->getDestPosition(); - } - unsigned n2aNumOfOutputs = 0; - if (N2a) { - n2aNumOfOutputs = cast<StructType>(F2a->getReturnType())->getNumElements(); - } - - unsigned shiftOfN1AllocOutputs = Fc->getArgumentList().size() - - F1c->getArgumentList().size() - - n2aNumOfOutputs; - - std::map<unsigned, unsigned> N1cInMap; - std::map<unsigned, unsigned> N1cOutMap; - std::map<unsigned, unsigned> N2cInMap; - std::map<unsigned, unsigned> N2cOutMap; - // These maps map the old location of an argument/output (to its function's - // parameter list/out struct) to the new, after edges removed and functions - // merged - - // This accounts for argument shifting, due to allocation node n1 - std::vector<unsigned> FcShiftMap; - - buildInputAndOutputMaps(N1c, N2c, N1cInMap, N1cOutMap, N2cInMap, N2cOutMap); - createShiftMap(Fc, n1aPosOfOutputs, n1aNumOfOutputs, shiftOfN1AllocOutputs, - FcShiftMap); - - - std::map<unsigned, unsigned> N1InDFEdgeMap; - std::map<unsigned, unsigned> N1OutDFEdgeMap; - std::map<unsigned, unsigned> N2InDFEdgeMap; - std::map<unsigned, unsigned> N2OutDFEdgeMap; - buildInAndOutEdgeMaps(N1, N2, N1InDFEdgeMap, N1OutDFEdgeMap, N2InDFEdgeMap, - N2OutDFEdgeMap); - - - // Start with the intrinsics for allocation nodes n1a and n2a - - // TODO: This is only for testing, not needed for functionality - std::map<IntrinsicInst*, IntrinsicInst*> CreateEdgeAndBindMap; - - if (N1a) { // If there is an allocation node for the first node - for (Value::user_iterator i = II1a->user_begin(), ie = II1a->user_end(); - i != ie; ++i) { - Value *v = *i; - Instruction *VI = dyn_cast<Instruction>(v); - IntrinsicInst* II = dyn_cast<IntrinsicInst>(VI); - assert(II && "Use of a node handle outside of a visc intrinsic"); - - switch(II->getIntrinsicID()) { - case Intrinsic::visc_createEdge: - // This is between allocation and compute node of n1. - { - // Change source to new allocation node - IntrinsicInst* IItemp1 = - createIdenticalCreateEdgeWithDifferentNode(II, IInewa, true); - // Do not change source port - // Change destination node to new compute node - IntrinsicInst* IItemp2 = - createIdenticalCreateEdgeWithDifferentNode(IItemp1, IInewc, false); - // Change destination port to new port, after inmap and shift - unsigned dstPos = cast<ConstantInt>(II->getOperand(4))->getZExtValue(); - IntrinsicInst* EI = - createIdenticalCreateEdgeWithDifferentPort(IItemp2, - FcShiftMap[N1cInMap[dstPos]], false); - IntrinsicInstructionsToAdd.push_back(EI); - IntermediateInstructions.push_back(IItemp1); - IntermediateInstructions.push_back(IItemp2); - CreateEdgeAndBindMap[II] = EI; - } - break; - case Intrinsic::visc_bind_input: - // These are the inputs from the parent node. - { - // The destination ports will not change, only the destination will - // be changed to point to the new allocation node - IntrinsicInst* BI = - createIdenticalBindInputWithDifferentNode(II, IInewa); - IntrinsicInstructionsToAdd.push_back(BI); - CreateEdgeAndBindMap[II] = BI; - } - break; - case Intrinsic::visc_bind_output: - assert(false && "Allocation node handle found in visc_bind_output"); - break; - default: - assert(false && "Unknown use of node handle"); - break; - } - } - } - - if (N2a) { // If there is an allocation node fot the second node - for (Value::user_iterator i = II2a->user_begin(), ie = II2a->user_end(); - i != ie; ++i) { - Value *v = *i; - Instruction *VI = dyn_cast<Instruction>(v); - IntrinsicInst* II = dyn_cast<IntrinsicInst>(VI); - assert(II && "Use of a node handle outside of a visc intrinsic"); - - switch(II->getIntrinsicID()) { - case Intrinsic::visc_createEdge: - // This is between allocation and compute node of n2. - { - // Change source to new allocation node - IntrinsicInst* IItemp1 = - createIdenticalCreateEdgeWithDifferentNode(II, IInewa, true); - // Change source port to after all outputs of n1a - unsigned srcPos = cast<ConstantInt>(II->getOperand(3))->getZExtValue(); - IntrinsicInst* IItemp2 = - createIdenticalCreateEdgeWithDifferentPort(IItemp1, - srcPos + n1aNumOfOutputs, true); - // Change destination node to new compute node - IntrinsicInst* IItemp3 = - createIdenticalCreateEdgeWithDifferentNode(IItemp2, IInewc, false); - // Change destination port to new port, after inmap and shift - // Use of FcShiftMap is not required here - allocation outputs of - // n2a will not get shifted, but it is OK to use (1-1 at this point) - unsigned dstPos = cast<ConstantInt>(II->getOperand(4))->getZExtValue(); - IntrinsicInst* EI = - createIdenticalCreateEdgeWithDifferentPort(IItemp3, - FcShiftMap[N2cInMap[dstPos]], false); - IntrinsicInstructionsToAdd.push_back(EI); - IntermediateInstructions.push_back(IItemp1); - IntermediateInstructions.push_back(IItemp2); - IntermediateInstructions.push_back(IItemp3); - CreateEdgeAndBindMap[II] = EI; - } - break; - case Intrinsic::visc_bind_input: - // These are the inputs from the parent node. - { - // Change destination node to new allocation node - IntrinsicInst* IItemp1 = - createIdenticalBindInputWithDifferentNode(II, IInewa); - // Change source port to new port, after edgeinmap - unsigned srcPos = cast<ConstantInt>(II->getOperand(1))->getZExtValue(); - IntrinsicInst* IItemp2 = - createIdenticalBindInputWithDifferentPort(IItemp1, - N2InDFEdgeMap[srcPos], true); - // Change destination port to new port, after inmap and shift - unsigned dstPos = cast<ConstantInt>(II->getOperand(2))->getZExtValue(); - IntrinsicInst* BI = - createIdenticalBindInputWithDifferentPort(IItemp2, - dstPos + n1aNumOfInputs, false); - IntrinsicInstructionsToAdd.push_back(BI); - IntermediateInstructions.push_back(IItemp1); - IntermediateInstructions.push_back(IItemp2); - CreateEdgeAndBindMap[II] = BI; - } - break; - case Intrinsic::visc_bind_output: - assert(false && "Allocation node handle found in visc_bind_output"); - break; - default: - assert(false && "Unknown use of node handle"); - break; - } - } - } - - // Continue with the intrinsics for compute nodes n1c and n2c - - for (Value::user_iterator i = II1c->user_begin(), ie = II1c->user_end(); - i != ie; ++i) { // Handle inputs and outputs of n1 compute node - Value *v = *i; - Instruction *VI = dyn_cast<Instruction>(v); - IntrinsicInst* II = dyn_cast<IntrinsicInst>(VI); - assert(II && "Use of a node handle outside of a visc intrinsic"); - - switch(II->getIntrinsicID()) { - case Intrinsic::visc_createEdge: - // This is between allocation and compute node of n1. - { - // These edges should have been handled when dealing with the - // allocation nodes - assert(CreateEdgeAndBindMap.find(II) != CreateEdgeAndBindMap.end() && - "Edge between A-C node should have been handled while processing A"); - } - break; - case Intrinsic::visc_bind_input: - // These are the inputs from the parent node. - { - // The destination ports will not change, only the destination will - // be changed to point to the new compute node - IntrinsicInst* BI = - createIdenticalBindInputWithDifferentNode(II, IInewc); - IntrinsicInstructionsToAdd.push_back(BI); - CreateEdgeAndBindMap[II] = BI; - } - break; - case Intrinsic::visc_bind_output: - // These are the outputs to the parent node. - { - // If this goes to n2, ignore edge completely - unsigned srcPos = cast<ConstantInt>(II->getOperand(1))->getZExtValue(); - if (N1c->getExtendedOutDFEdgeAt(srcPos)->getDestDF() != N2c) { - // this bind creates an edge that ends up to another node in the graph - // Change source to new compute node - IntrinsicInst* IItemp1 = - createIdenticalBindOutputWithDifferentNode(II, IInewc); - // Change source port to new port after outmap - IntrinsicInst* IItemp2 = - createIdenticalBindOutputWithDifferentPort(IItemp1, - N1cOutMap[srcPos], true); - // Change destination port to new port after edgeoutmap - unsigned dstPos = cast<ConstantInt>(II->getOperand(2))->getZExtValue(); - IntrinsicInst* BI = - createIdenticalBindOutputWithDifferentPort(IItemp2, - N1OutDFEdgeMap[dstPos], false); - IntrinsicInstructionsToAdd.push_back(BI); - IntermediateInstructions.push_back(IItemp1); - IntermediateInstructions.push_back(IItemp2); - CreateEdgeAndBindMap[II] = BI; - } - } - break; - default: - errs() << "Unknown use: " << *II << "\n"; - assert(false && "Unknown use of node handle"); - break; - } - } - - for (Value::user_iterator i = II2c->user_begin(), ie = II2c->user_end(); - i != ie; ++i) { // Handle inputs and outputs of n2 compute node - Value *v = *i; - Instruction *VI = dyn_cast<Instruction>(v); - IntrinsicInst* II = dyn_cast<IntrinsicInst>(VI); - assert(II && "Use of a node handle outside of a visc intrinsic"); - - switch(II->getIntrinsicID()) { - case Intrinsic::visc_createEdge: - // This is between allocation and compute node of n2. - { - // These edges should have been handled when dealing with the - // allocation nodes - assert(CreateEdgeAndBindMap.find(II) != CreateEdgeAndBindMap.end() && - "Edge between A-C node should have been handled while processing A"); - } - break; - case Intrinsic::visc_bind_input: - // These are the inputs from the parent node. - { - // If this is incoming from n1 compute node, ignore completely - unsigned dstPos = cast<ConstantInt>(II->getOperand(2))->getZExtValue(); - if (N2c->getExtendedInDFEdgeAt(dstPos)->getSourceDF() != N1c) { - // this bind creates an edge that comes from another node in the graph - // Change destination to new compute node - IntrinsicInst* IItemp1 = - createIdenticalBindInputWithDifferentNode(II, IInewc); - // Change source port to new port after edgeinmap - unsigned srcPos = cast<ConstantInt>(II->getOperand(1))->getZExtValue(); - IntrinsicInst* IItemp2 = - createIdenticalBindInputWithDifferentPort(IItemp1, - N2InDFEdgeMap[srcPos], true); - // Change destination port to new port after inmap and shift - unsigned dstPos = cast<ConstantInt>(II->getOperand(2))->getZExtValue(); - IntrinsicInst* BI = - createIdenticalBindInputWithDifferentPort(IItemp2, - FcShiftMap[N2cInMap[dstPos]], false); - IntrinsicInstructionsToAdd.push_back(BI); - IntermediateInstructions.push_back(IItemp1); - IntermediateInstructions.push_back(IItemp2); - CreateEdgeAndBindMap[II] = BI; - } - } - break; - case Intrinsic::visc_bind_output: - // These are the outputs to the parent node. - { - // this bind creates an edge that ends up to another node in the graph - // Change source to new compute node - IntrinsicInst* IItemp1 = - createIdenticalBindOutputWithDifferentNode(II, IInewc); - // Change source port to new port after outmap - unsigned srcPos = cast<ConstantInt>(II->getOperand(1))->getZExtValue(); - IntrinsicInst* IItemp2 = - createIdenticalBindOutputWithDifferentPort(IItemp1, - N2cOutMap[srcPos], true); - // Change destination port to new port after edgeoutmap - unsigned dstPos = cast<ConstantInt>(II->getOperand(2))->getZExtValue(); - IntrinsicInst* BI = - createIdenticalBindOutputWithDifferentPort(IItemp2, - N2OutDFEdgeMap[dstPos], false); - IntrinsicInstructionsToAdd.push_back(BI); - IntermediateInstructions.push_back(IItemp1); - IntermediateInstructions.push_back(IItemp2); - CreateEdgeAndBindMap[II] = BI; - } - break; - default: - assert(false && "Unknown use of node handle"); - break; - } - } - -} - -void deleteInternalNodeFunction(DFNode* N, BuildDFG &DFG) { - - if (dyn_cast<DFLeafNode>(N)) - return; - - for (inst_iterator i = inst_begin(N->getFuncPointer()), - e = inst_end(N->getFuncPointer()); i != e ; ++i) { - Instruction* I = &*i; // Grab pointer to Instruction - if(IntrinsicInst* II = dyn_cast<IntrinsicInst>(I)) { - switch(II->getIntrinsicID()) { - case Intrinsic::visc_createNode: - case Intrinsic::visc_createNode1D: - case Intrinsic::visc_createNode2D: - case Intrinsic::visc_createNode3D: - // ---------------------------------------------------------------- // - // Updating the BuildDFG result - // remove the node from mapping - DFG.removeElementFromHandleToDFNodeMap(II); - // ---------------------------------------------------------------- // - break; - case Intrinsic::visc_createEdge: - case Intrinsic::visc_bind_input: - case Intrinsic::visc_bind_output: - // ---------------------------------------------------------------- // - // Updating the BuildDFG result - // remove the edge from mapping - DFG.removeElementFromHandleToDFEdgeMap(II); - // ---------------------------------------------------------------- // - break; - default: - errs() << "Error: Invalid VISC Intrinsic inside Internal node!\n\t" << *II << "\n"; - break; - } - } - } - - // Erase Functions associated with node N - Function* F = N->getFuncPointer(); - -errs() << "Removing " << F->getName() << "\n"; - F->replaceAllUsesWith(UndefValue::get(F->getType())); - F->eraseFromParent(); - -} - -/* -void shiftAttrsToLeftBy(Function* F, unsigned shift, unsigned argNo) { - // Source attr location : i+shift (+1), dst : i (+1) - for (unsigned i = argno; i + shift < F->getArgumentList().size(); i++) { - AttributeSet AS = F->getAttributes(); - AttrBuilder AB(AS, i+shift+1); - AttributeSet argAS = AttributeSet::get(F->getContext(), i+1, AB); - F->removeAttributes(i+1,AS.getParamAttributes(i+1)); - F->addAttributes(i+1, argAS); - } - -} - -void shiftArgumentNamesToLeftBy(Function* F, unsigned shift, unsigned argNo) { - // Source attr location : i+shift (+1), dst : i (+1) - - // Skip arguments up until argNo - Function::arg_iterator ai = F->arg_begin(), ae = F->arg_end(), as = F->arg_begin(); - for ( ; (ai != ae) && (ai->getArgNo() < argNo); ++ai, ++as) { } - - // Find source of name - for ( unsigned i = 0; (i < shift) && (as != ae); i++) { - ++as; - } - - for ( ; (ai != ae) && (as != ae); ++ai, ++as) { - ai->setName(as->getName()); - } - -} - -void removeFunctionArgument(Function* F, Argument *ArgToRemove) { - - // Shift attributes one to the left - shiftAttrsToLeftBy(F, 1, ArgToRemove->getArgNo()); - // Shift argument names one to the left - shiftArgumentNamesToLeftBy(F, 1, ArgToRemove->getArgNo()); - // Update the type of F - std::vector<Type*> ArgTypes; - for(auto& arg: F->getArgumentList()) { - DEBUG(errs() << arg << "\n"); - if (&arg != ArgToRemove) - ArgTypes.push_back(arg.getType()); - } - FunctionType* FTy = FunctionType::get(F->getReturnType(), ArgTypes, F->isVarArg()); - PointerType* PTy = FTy->getPointerTo(); - F->mutateType(PTy); - -} -*/ - -Argument* getFunctionArgumentAt(Function* F, unsigned i) { - assert((i < F->getArgumentList().size()) && "Requesting argument in invalid position"); - for (auto& arg: F->getArgumentList()) { - if (arg.getArgNo() == i) - return &arg; - } - return NULL; -} - -// TODO -void removeUnnecessaryInputEdges(DFNode* N, DFNode* N1, - unsigned numOfN1AllocArgs, - unsigned numOfN2AllocArgs) { - Function* F = N->getFuncPointer(); - Function* F1 = N1->getFuncPointer(); - // Compute these once - they may change while in the loop - unsigned f1ArgListSize = F1->getArgumentList().size(); - unsigned fArgListSize = F->getArgumentList().size(); - // Iterate over input parameters of F1 without allocation arguments - for (unsigned i = 0; i < f1ArgListSize - numOfN1AllocArgs; i++) { - DFEdge* N1InEdge = N->getInDFEdgeAt(i); - unsigned n1SrcPos = N1InEdge->getSourcePosition(); - for (unsigned j = f1ArgListSize - numOfN1AllocArgs, - pos = f1ArgListSize - numOfN1AllocArgs; - j < fArgListSize - numOfN2AllocArgs; j++, pos++) { - DFEdge* N2InEdge = N->getInDFEdgeAt(pos); - unsigned n2SrcPos = N2InEdge->getSourcePosition(); - Argument* n1arg = getFunctionArgumentAt(F, i); - Argument* n2arg = getFunctionArgumentAt(F, j); - DEBUG(errs() << "Comparing " << *n1arg << " with " << *n2arg << "\n"); - // If the edges are coming from the same position of the same source node - // If the arguments are not pointer arguments, or if they are pointer - // arguments without the out attribute (they are only used as inputs) - if ((N1InEdge->getSourceDF() == N2InEdge->getSourceDF()) && - (n1SrcPos == n2SrcPos) && - ((!(n1arg->getType()->isPointerTy()) && - !(n2arg->getType()->isPointerTy())) || - (!(hasAttribute(F, i, Attribute::Out)) && - !(hasAttribute(F, pos, Attribute::Out))) ) ) { - DEBUG(errs() << "Replacing " << *n1arg << " with " << *n2arg << "\n"); - // It is safe to remove the second argument and replace its uses with - // the first one - n2arg->replaceAllUsesWith(n1arg); -// removeFunctionArgument(F, n2arg); TODO -// removeInputEdgeAt(F, pos); TODO - } else { - // It is not safe to remove the second argument. Update position -// pos++; TODO increase here instead of loop increment - } - } - } -} - -// This function checks the metadata in visc code for a function's target hint -static visc::Target getPreferredTarget(Function* F) { - DEBUG(errs() << "Finding preferred target for " << F->getName() << "\n"); - Module* M = F->getParent(); - // checking for GPU hint - NamedMDNode* HintNode = M->getOrInsertNamedMetadata("visc_hint_gpu"); - for(unsigned i = 0; i < HintNode->getNumOperands(); i++) { - MDNode* N = HintNode->getOperand(i); - Value* FHint = dyn_cast<ValueAsMetadata>(N->getOperand(0).get())->getValue(); - if(F == FHint) - return visc::GPU_TARGET; - } - - // checking for SPIR hint - HintNode = M->getOrInsertNamedMetadata("visc_hint_spir"); - for(unsigned i = 0; i < HintNode->getNumOperands(); i++) { - MDNode* N = HintNode->getOperand(i); - Value* FHint = dyn_cast<ValueAsMetadata>(N->getOperand(0).get())->getValue(); - if(F == FHint) - return visc::SPIR_TARGET; - } - - // checking for CPU hint - HintNode = M->getOrInsertNamedMetadata("visc_hint_cpu"); - for(unsigned i = 0; i < HintNode->getNumOperands(); i++) { - MDNode* N = HintNode->getOperand(i); - Value* FHint = dyn_cast<ValueAsMetadata>(N->getOperand(0).get())->getValue(); - if(F == FHint) - return visc::CPU_TARGET; - } - return visc::None; -} - -// This function adds the hint as metadata in visc code -static void addHint(Function* F, visc::Target T) { - // Get Module - Module* M = F->getParent(); - DEBUG(errs() << "Set preferred target for " << F->getName() << ": " << T << "\n"); - - // Based on the hint, get the hint metadata - NamedMDNode* HintNode; - switch (T) { - case visc::GPU_TARGET: - HintNode = M->getOrInsertNamedMetadata("visc_hint_gpu"); - break; - case visc::SPIR_TARGET: - HintNode = M->getOrInsertNamedMetadata("visc_hint_spir"); - break; - case visc::CPU_TARGET: - HintNode = M->getOrInsertNamedMetadata("visc_hint_cpu"); - break; - default: - llvm_unreachable("Unsupported Target Hint!"); - break; - } - - // Create a node for the function and add it to the hint node - MDNode* N = MDNode::get(M->getContext(), ArrayRef<Metadata*>(ValueAsMetadata::get(F))); - HintNode->addOperand(N); -} - -// This function removes the hint as metadata in visc code -static void removeHint(Function* F, visc::Target T) { - // Get Module - Module* M = F->getParent(); - DEBUG(errs() << "Remove preferred target for " << F->getName() << ": " << T << "\n"); - - // Based on the hint, get the hint metadata - NamedMDNode* HintNode; - switch (T) { - case visc::GPU_TARGET: - HintNode = M->getOrInsertNamedMetadata("visc_hint_gpu"); - break; - case visc::SPIR_TARGET: - HintNode = M->getOrInsertNamedMetadata("visc_hint_spir"); - break; - case visc::CPU_TARGET: - HintNode = M->getOrInsertNamedMetadata("visc_hint_cpu"); - break; - default: - llvm_unreachable("Unsupported Target Hint!"); - break; - } - - // Gather metadata nodes, and keep those not associated with this function - MDNode* N = MDNode::get(M->getContext(), ArrayRef<Metadata*>(ValueAsMetadata::get(F))); - std::vector<MDNode*> MDNodes; - - for (unsigned i = 0; i < HintNode->getNumOperands(); i++) { - MDNode* MDN = HintNode->getOperand(i); - if (MDN == N) { - continue; - } - MDNodes.push_back(MDN); - } - - HintNode->dropAllReferences(); - - for (unsigned i = 0; i < MDNodes.size(); i++) { - HintNode->addOperand(MDNodes[i]); - } - -} - -std::string getTestModuleName(Module &M) { - std::string mid = M.getModuleIdentifier(); - return mid.append(".original.ll"); -} - -} // End of namespace mergedfn - -char MergeDFN::ID = 0; -static RegisterPass<MergeDFN> X("mergedfn", - "Dataflow node merging optimization", - true /* modifies the CFG */, - true /* transformation, * - * not just analysis */); - diff --git a/lib/MergeDFN/MergeDFN.exports b/lib/MergeDFN/MergeDFN.exports deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000