From 21a2ad945bfba7a0db89ec4187c9db92a1bf1a60 Mon Sep 17 00:00:00 2001 From: Prakalp Srivastava <psrivas2@illinois.edu> Date: Thu, 11 Sep 2014 21:37:52 +0000 Subject: [PATCH] (1) VISC runtime added to projects. Working for X86 pthreads (2) Modified BuildDFG and DFG2LLVM_X86 to generate the correct calls to visc runtime (3) Modified the test cases to the new launch wait semantics of visc (4) Added Wait intrinsic to VISC and modified the argument list of launch and wait --- llvm/include/llvm/IR/IntrinsicsVISC.td | 12 +- llvm/lib/Transforms/BuildDFG/BuildDFG.cpp | 8 +- .../Transforms/DFG2LLVM_X86/DFG2LLVM_X86.cpp | 175 ++++++++++-------- llvm/projects/visc-rt/visc-rt.cpp | 14 +- llvm/test/VISC/unitTests/3level.ll | 26 ++- llvm/test/VISC/unitTests/query2D.ll | 24 ++- llvm/test/VISC/unitTests/query3D.ll | 24 ++- llvm/test/VISC/unitTests/queryNodeInst.ll | 24 ++- llvm/test/VISC/unitTests/queryNumDim.ll | 24 ++- llvm/test/VISC/unitTests/queryNumNodeInst.ll | 24 ++- llvm/test/VISC/unitTests/singleNode.ll | 15 +- llvm/test/VISC/unitTests/twoNode.ll | 20 +- llvm/test/VISC/unitTests/twoNodeConnect.ll | 24 ++- llvm/test/VISC/unitTests/twoNodeQuery.ll | 24 ++- 14 files changed, 285 insertions(+), 153 deletions(-) diff --git a/llvm/include/llvm/IR/IntrinsicsVISC.td b/llvm/include/llvm/IR/IntrinsicsVISC.td index e40bec31fa..ed848f99b5 100644 --- a/llvm/include/llvm/IR/IntrinsicsVISC.td +++ b/llvm/include/llvm/IR/IntrinsicsVISC.td @@ -18,12 +18,16 @@ let TargetPrefix = "visc" in { */ /* Launch intrinsic - - * i8* llvm.visc.launch(i8*, i8*, int); + * i32 llvm.visc.launch(graphID*, function* , ArgList*); */ - def int_visc_launch : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_ptr_ty, - llvm_i32_ty], []>; + def int_visc_launch : Intrinsic<[llvm_i32_ty], [llvm_ptrptr_ty, llvm_ptr_ty, + llvm_ptr_ty], []>; + + /* Wait intrinsic - + * i32 llvm.visc.wait(graphID*, returnVal*); + */ + def int_visc_wait : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], []>; - def int_visc_test : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty], []>; /* Create Node intrinsic - * i8* llvm.visc.createNode(function*); */ diff --git a/llvm/lib/Transforms/BuildDFG/BuildDFG.cpp b/llvm/lib/Transforms/BuildDFG/BuildDFG.cpp index 8081f4065e..466bc41e14 100644 --- a/llvm/lib/Transforms/BuildDFG/BuildDFG.cpp +++ b/llvm/lib/Transforms/BuildDFG/BuildDFG.cpp @@ -49,7 +49,7 @@ bool BuildDFG::runOnModule(Module &M) { // Intrinsic Instruction has been initialized from this point on. - Function* F = cast<Function>((II->getOperand(0))->stripPointerCasts()); + Function* F = cast<Function>((II->getOperand(1))->stripPointerCasts()); Root = DFInternalNode::Create(II, F); BuildGraph(Root, F); @@ -313,9 +313,6 @@ void BuildDFG::BuildGraph (DFInternalNode* N, Function *F) { if(IntrinsicInst* II = dyn_cast<IntrinsicInst>(I)) { errs() << "IntrinsicID = " << II->getIntrinsicID() << ": " << II->getCalledFunction()->getName()<<"\n"; switch(II->getIntrinsicID()) { - case Intrinsic::visc_test: - errs() << "Found Test Intrinsic"; - break; case Intrinsic::visc_createNode: case Intrinsic::visc_createNode1D: @@ -335,8 +332,9 @@ void BuildDFG::BuildGraph (DFInternalNode* N, Function *F) { break; //TODO: Reconsider launch within a dataflow graph (recursion?) + case Intrinsic::visc_wait: case Intrinsic::visc_launch: - errs() << "Error: Launch intrinsic used within a dataflow graph\n"; + errs() << "Error: Launch/wait intrinsic used within a dataflow graph\n"; break; default: diff --git a/llvm/lib/Transforms/DFG2LLVM_X86/DFG2LLVM_X86.cpp b/llvm/lib/Transforms/DFG2LLVM_X86/DFG2LLVM_X86.cpp index d4cfeccad4..8c111a64b5 100644 --- a/llvm/lib/Transforms/DFG2LLVM_X86/DFG2LLVM_X86.cpp +++ b/llvm/lib/Transforms/DFG2LLVM_X86/DFG2LLVM_X86.cpp @@ -63,10 +63,6 @@ private: Value* addLoop(Instruction* I, Value* limit, const Twine& indexName = ""); Argument* getArgumentFromEnd(Function* F, unsigned offset); Argument* getArgumentAt(Function* F, unsigned offset); - Constant* getOrInsertPThreadCreate(); - Constant* getOrInsertPThreadJoin(); - Constant* getOrInsertPThreadExit(); - Constant* getOrInsertMalloc(); void codeGenLaunch(DFInternalNode* Root); void codeGen(DFInternalNode* N); void codeGen(DFLeafNode* N); @@ -117,6 +113,41 @@ bool DFG2LLVM_X86::runOnModule(Module &M) { return true; } +/* Returns vector of all wait instructions + */ +std::vector<CallInst*>* getWaitList(CallInst* LI) { + Value* GraphIDAddr = LI->getArgOperand(0); + std::vector<CallInst*>* WaitList = new std::vector<CallInst*>(); + // It must have been loaded from memory somewhere + GraphIDAddr->use_begin(); + for(Value::use_iterator ui = GraphIDAddr->use_begin(), + ue = GraphIDAddr->use_end(); ui!=ue; ++ui) { + if(LoadInst* LI = dyn_cast<LoadInst>(*ui)) { + DEBUG(errs() << *LI << "\n"); + for(Value::use_iterator i = LI->use_begin(), e = LI->use_end(); i!=e; ++i) { + if(CallInst* waitI = dyn_cast<CallInst>(*i)) { + DEBUG(errs() << *waitI << "\n"); + WaitList->push_back(waitI); + } + } + } + // If graphID memory address is used by another launch, then break + if(CallInst* CI = dyn_cast<CallInst>(*ui)) { + if(LI != CI) { + DEBUG(errs()<< "Warning: Overwriting graph ID in memory -- " << *CI << "\n" << *LI << "\n"); + break; + } + } + // If graphID in memory is overwritten using store, it's an error + if(StoreInst* SI =dyn_cast<StoreInst>(*ui)) { + assert(SI->getPointerOperand() == GraphIDAddr + && "Error: Do not manually write over graphID in memory!"); + } + + } + return WaitList; +} + void CodeGenTraversal::addIdxDimArgs(Function* F) { // Add Index and Dim arguments std::string names[] = {"idx_x", "idx_y", "idx_z", "dim_x", "dim_y", "dim_z"}; @@ -224,6 +255,10 @@ Value* CodeGenTraversal::addLoop(Instruction* I, Value* limit, const Twine& inde } void CodeGenTraversal::codeGenLaunch(DFInternalNode* Root) { + // TODO: Place an assert to check if the constant passed bu launch intrinsic + // as the number of arguments to DFG is same as the number of arguments of the + // root of DFG + // Get Launch Instruction IntrinsicInst* LI = Root->getInstruction(); @@ -233,54 +268,42 @@ void CodeGenTraversal::codeGenLaunch(DFInternalNode* Root) { Type* i8Ty = Type::getInt8Ty(LI->getContext()); Type* voidTy = Type::getVoidTy(LI->getContext()); - /* Get or Insert pthread utilities necessary to run DFG as a separate thread - * (1) pthread_create - * (2) pthread_join - * (3) pthread_exit - * Also requires a new struct for pthread_attr_t + /* Get or Insert visc runtime utilities necessary to run DFG as a separate thread + * (1) llvm_visc_launch_x86 + * (2) llvm_visc_wait_x86 */ - Type *PThreadTy, *PThreadAttrTy, *PThreadArgTy; + Type *GraphIDTy; std::vector<Type*>Elements; // PThreads use different attribute types for 32-bit and 64-bit machines if(M.getPointerSize() == Module::Pointer64) { - PThreadTy = Type::getInt64Ty(LI->getContext()); - Elements.push_back(ArrayType::get(Type::getInt8Ty(LI->getContext()), 56)); + GraphIDTy = i64Ty; } else { - PThreadTy = Type::getInt32Ty(LI->getContext()); - Elements.push_back(ArrayType::get(Type::getInt8Ty(LI->getContext()), 36)); + GraphIDTy = i32Ty; } - PThreadAttrTy = StructType::create(LI->getContext(), Elements, "union.pthread_attr_t"); - FunctionType* PThreadFuncTy = FunctionType::get(i8Ty->getPointerTo(), - ArrayRef<Type*>(i8Ty->getPointerTo()), - false); - PThreadArgTy = i8Ty->getPointerTo(); - - // Argument types for pthread_create - Type* ArgTypesPTCreate[] = {PThreadTy->getPointerTo(), - PThreadAttrTy->getPointerTo(), - PThreadFuncTy->getPointerTo(), - i8Ty->getPointerTo()}; - // Construct FunctionType of pthread_create call - FunctionType* PThreadCreateTy = FunctionType::get(i32Ty, - ArrayRef<Type*>(ArgTypesPTCreate, 4), - false); - // Argument types for pthread_join - Type* ArgTypesPTJoin[] = {PThreadTy, - i8Ty->getPointerTo()->getPointerTo()}; - // Construct FunctionType for pthread_join call - FunctionType* PThreadJoinTy = FunctionType::get(i32Ty, - ArrayRef<Type*>(ArgTypesPTJoin, 2), - false); - // Construct FunctionType for pthread_exit call - FunctionType* PThreadExitTy = FunctionType::get(voidTy, + FunctionType* AppFuncTy = FunctionType::get(i8Ty->getPointerTo(), ArrayRef<Type*>(i8Ty->getPointerTo()), false); + // Argument types for llvm_visc_launch_x86 + Type* ArgTypesLaunch[] = {i8Ty->getPointerTo()->getPointerTo(), + AppFuncTy->getPointerTo(), + i8Ty->getPointerTo()}; + + // Construct FunctionType of llvm_visc_launch_x86 call + FunctionType* LaunchFuncTy = FunctionType::get(i32Ty, + ArrayRef<Type*>(ArgTypesLaunch, 3), + false); + + // Construct FunctionType for llvm_visc_wait_x86 call + FunctionType* WaitFuncTy = FunctionType::get(i32Ty, + ArrayRef<Type*>(i8Ty->getPointerTo()), + false); + // Get or insert the global declarations for pthread functions - Constant* PThreadCreate = M.getOrInsertFunction("pthread_create", PThreadCreateTy); - Constant* PThreadJoin = M.getOrInsertFunction("pthread_join", PThreadJoinTy); - Constant* PThreadExit = M.getOrInsertFunction("pthread_exit", PThreadExitTy); + Constant* Launch = M.getOrInsertFunction("llvm_visc_launch_x86", LaunchFuncTy); + Constant* Wait = M.getOrInsertFunction("llvm_visc_wait_x86", WaitFuncTy); + // Construct FunctionType for malloc call FunctionType* MallocTy = FunctionType::get(i8Ty->getPointerTo(), ArrayRef<Type*>(i64Ty), @@ -301,17 +324,17 @@ void CodeGenTraversal::codeGenLaunch(DFInternalNode* Root) { * passed to pthread_exit call. */ // Create Launch Function of type i8*(i8*) which calls the root function - Function* LaunchFunc = Function::Create(PThreadFuncTy, - Root->getFuncPointer()->getLinkage(), - "LaunchDataflowGraph", - &M); + Function* AppFunc = Function::Create(AppFuncTy, + Root->getFuncPointer()->getLinkage(), + "LaunchDataflowGraph", + &M); // Give a name to the argument which is used pass data to this thread - Value* data = LaunchFunc->arg_begin(); + Value* data = AppFunc->arg_begin(); data->setName("data.addr"); // Add a basic block to this empty function and a return null statement to it - BasicBlock *BB = BasicBlock::Create(LaunchFunc->getContext(), "entry", LaunchFunc); - ReturnInst* RI = ReturnInst::Create(LaunchFunc->getContext(), - Constant::getNullValue(LaunchFunc->getReturnType()), + BasicBlock *BB = BasicBlock::Create(AppFunc->getContext(), "entry", AppFunc); + ReturnInst* RI = ReturnInst::Create(AppFunc->getContext(), + Constant::getNullValue(AppFunc->getReturnType()), BB); // Find the X86 function generated for Root and Function* RootF_X86 = FMap[Root->getFuncPointer()]; @@ -351,46 +374,38 @@ void CodeGenTraversal::codeGenLaunch(DFInternalNode* Root) { argNum++; data = GEP; } + // Code for returning the output Constant* SizeOf = ConstantExpr::getSizeOf(CI->getType()); - CallInst* OutputAddr = CallInst::Create(Malloc, ArrayRef<Value*>(SizeOf), "output.addr", RI); - CastInst* OutputAddrCast = CastInst::CreatePointerCast(OutputAddr, + CastInst* OutputAddrCast = CastInst::CreatePointerCast(data, CI->getType()->getPointerTo(), CI->getName()+".addr", RI); new StoreInst(CI, OutputAddrCast, RI); - CallInst::Create(PThreadExit, ArrayRef<Value*>(OutputAddr), "", RI); - DEBUG(errs() << "Launch Function:\n"); - DEBUG(errs() << *LaunchFunc << "\n"); + DEBUG(errs() << "Application specific function:\n"); + DEBUG(errs() << *AppFunc << "\n"); // Substitute launch intrinsic main - AllocaInst* AI = new AllocaInst(PThreadTy, "DFG_threadID.addr", LI); - DEBUG(errs() << *AI << "\n"); - Value* PTCreateArgs[] = {AI, - Constant::getNullValue(PThreadCreateTy->getParamType(1)), - LaunchFunc, - LI->getArgOperand(1)}; - CallInst* PTCreateInst = CallInst::Create(PThreadCreate, - ArrayRef<Value*>(PTCreateArgs,4), - "", LI); - - DEBUG(errs() << *PTCreateInst << "\n"); - // Place Join - LoadInst* LoadPThreadID = new LoadInst(AI, "DFG_threadID", LI->getParent()->getTerminator()); - AllocaInst* DFGOut = new AllocaInst(RootF_X86->getReturnType()->getPointerTo(), - LaunchFunc->getName()+".out", - LI->getParent()->getTerminator()); - CastInst* DFGOutCast = CastInst::CreatePointerCast(DFGOut, - i8Ty->getPointerTo()->getPointerTo(), "DFG_return", - LI->getParent()->getTerminator()); - - Value* PTJoinArgs[] = {LoadPThreadID, DFGOutCast}; - CallInst* PTJoinInst = CallInst::Create(PThreadJoin, - ArrayRef<Value*>(PTJoinArgs,2), - "", - LI->getParent()->getTerminator()); - DEBUG(errs() << *PTJoinInst << "\n"); + Value* LaunchInstArgs[] = {LI->getArgOperand(0), + AppFunc, + LI->getArgOperand(2)}; + CallInst* LaunchInst = CallInst::Create(Launch, + ArrayRef<Value*>(LaunchInstArgs,3), + "", LI); + //ReplaceInstWithInst(LI, LaunchInst); + + DEBUG(errs() << *LaunchInst << "\n"); + // Replace all wait instructions with x86 specific wait instructions + std::vector<CallInst*>* WaitList = getWaitList(LaunchInst); + for(unsigned i=0; i < WaitList->size(); ++i) { + CallInst* waitI = WaitList->at(i); + CallInst* waitI_X86 = CallInst::Create(Wait, + ArrayRef<Value*>(waitI->getArgOperand(0)), + ""); + ReplaceInstWithInst(waitI, waitI_X86); + DEBUG(errs() << *waitI_X86 << "\n"); + } } diff --git a/llvm/projects/visc-rt/visc-rt.cpp b/llvm/projects/visc-rt/visc-rt.cpp index 7945230f26..9080a31b4a 100644 --- a/llvm/projects/visc-rt/visc-rt.cpp +++ b/llvm/projects/visc-rt/visc-rt.cpp @@ -15,14 +15,14 @@ typedef struct { } DFNodeContext_PTX; extern "C" -__int32_t llvm_visc_launch_x86(size_t* graphID, void* (*rootFunc)(void*), void* arguments) { +__int32_t llvm_visc_launch_x86(void** graphID, void* (*rootFunc)(void*), void* arguments) { DFNodeContext_X86 *Context = (DFNodeContext_X86 *) malloc(sizeof(DFNodeContext_X86)); + *graphID = Context; return pthread_create(&Context->threadID, NULL, rootFunc, arguments); - *graphID = (size_t) Context; } extern "C" -__int32_t llvm_visc_wait_x86(size_t graphID) { +__int32_t llvm_visc_wait_x86(void* graphID) { DFNodeContext_X86* Context = (DFNodeContext_X86*) graphID; return pthread_join(Context->threadID, NULL); } @@ -87,14 +87,14 @@ static char* LoadProgSource(const char* cFilename, size_t* szFinalLength) } extern "C" -__int32_t llvm_visc_launch_ptx(size_t* graphID, void* (*rootFunc) (void*), void* arguments) { +__int32_t llvm_visc_launch_ptx(void** graphID, void* (*rootFunc) (void*), void* arguments) { // Initialize OpenCL // OpenCL specific variables DFNodeContext_PTX *Context = (DFNodeContext_PTX *) malloc(sizeof(DFNodeContext_PTX)); // Return Context pointer as grpahID; - *graphID = (size_t) Context; + *graphID = Context; size_t dataBytes; @@ -117,7 +117,7 @@ __int32_t llvm_visc_launch_ptx(size_t* graphID, void* (*rootFunc) (void*), void* checkErr(errcode, CL_SUCCESS, "Failure to get number of platforms"); // now get all the platform IDs - cl_platform_id platforms[numPlatforms]; + cl_platform_id* platforms = (cl_platform_id*) malloc(sizeof(cl_platform_id)*numPlatforms); errcode = clGetPlatformIDs(numPlatforms, platforms, NULL); checkErr(errcode, CL_SUCCESS, "Failure to get platform IDs"); @@ -242,7 +242,7 @@ __int32_t llvm_visc_launch_ptx(size_t* graphID, void* (*rootFunc) (void*), void* extern "C" -__int32_t llvm_visc_wait_ptx(size_t graphID) { +__int32_t llvm_visc_wait_ptx(void* graphID) { DFNodeContext_PTX *Context = (DFNodeContext_PTX*) graphID; clFinish(Context->clCommandQue); diff --git a/llvm/test/VISC/unitTests/3level.ll b/llvm/test/VISC/unitTests/3level.ll index dbb13c6951..c884acf32c 100644 --- a/llvm/test/VISC/unitTests/3level.ll +++ b/llvm/test/VISC/unitTests/3level.ll @@ -1,5 +1,6 @@ ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S < %s -; RUN: clang %t.ll -lpthread -o %t.bin +; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll +; RUN: clang -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin ; RUN: %t.bin 5 ; ModuleID = '/home/psrivas2/current-test/unitTests/3level.ll' target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" @@ -16,7 +17,10 @@ declare i8* @llvm.visc.createNode(i8*) #0 declare i8* @llvm.visc.createEdge(i8*, i8*, i1, i32, i32) #0 ; Function Attrs: nounwind -declare i8* @llvm.visc.launch(i8*, i8*, i32) #0 +declare i32 @llvm.visc.launch(i8**, i8*, i8*) #0 + +; Function Attrs: nounwind +declare i32 @llvm.visc.wait(i8*) #0 ; Function Attrs: nounwind declare i8* @llvm.visc.getNode() #0 @@ -30,15 +34,25 @@ declare void @llvm.visc.bind.output(i8*, i32, i32) ; Function Attrs: nounwind uwtable define i32 @main(i32 %argc, i8** nocapture %argv) #1 { entry: - %in.addr = alloca i32 + %in.addr = alloca { i32, %rtype } %arrayidx = getelementptr inbounds i8** %argv, i64 1 %0 = load i8** %arrayidx, align 8, !tbaa !0 %call.i = tail call i64 @strtol(i8* nocapture %0, i8** null, i32 10) #0 %conv.i = trunc i64 %call.i to i32 - store i32 %conv.i, i32* %in.addr - %args = bitcast i32* %in.addr to i8* - %launch = call i8* @llvm.visc.launch(i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args, i32 1) + %1 = bitcast { i32, %rtype }* %in.addr to i32* + store i32 %conv.i, i32* %1 + %args = bitcast { i32, %rtype }* %in.addr to i8* + %graphIDloc = alloca i8* + %launch = call i32 @llvm.visc.launch(i8** %graphIDloc, i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args) %call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %conv.i) #0 + %graphID = load i8** %graphIDloc + %wait = call i32 @llvm.visc.wait(i8* %graphID) + %2 = getelementptr { i32, %rtype }* %in.addr, i32 0, i32 1 + %outputstruct = load %rtype* %2 + %output1 = extractvalue %rtype %outputstruct, 0 + %output2 = extractvalue %rtype %outputstruct, 1 + %call2 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %output1) #0 + %call3 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %output2) #0 ret i32 0 } diff --git a/llvm/test/VISC/unitTests/query2D.ll b/llvm/test/VISC/unitTests/query2D.ll index c5d4bd2025..9b2ad72c5a 100644 --- a/llvm/test/VISC/unitTests/query2D.ll +++ b/llvm/test/VISC/unitTests/query2D.ll @@ -1,5 +1,6 @@ ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S < %s -; RUN: clang %t.ll -lpthread -o %t.bin +; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll +; RUN: clang -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin ; RUN: %t.bin 5 ; ModuleID = '/home/psrivas2/current-test/unitTests/query2D.ll' target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" @@ -22,7 +23,10 @@ declare i8* @llvm.visc.createNode2D(i8*, i32, i32) #0 declare i8* @llvm.visc.createEdge(i8*, i8*, i1, i32, i32) #0 ; Function Attrs: nounwind -declare i8* @llvm.visc.launch(i8*, i8*, i32) #0 +declare i32 @llvm.visc.launch(i8**, i8*, i8*) #0 + +; Function Attrs: nounwind +declare i32 @llvm.visc.wait(i8*) #0 ; Function Attrs: nounwind declare i8* @llvm.visc.getNode() #0 @@ -45,15 +49,23 @@ declare void @llvm.visc.bind.output(i8*, i32, i32) ; Function Attrs: nounwind uwtable define i32 @main(i32 %argc, i8** nocapture %argv) #1 { entry: - %in.addr = alloca i32 + %in.addr = alloca { i32, %rtype } %arrayidx = getelementptr inbounds i8** %argv, i64 1 %0 = load i8** %arrayidx, align 8, !tbaa !0 %call.i = tail call i64 @strtol(i8* nocapture %0, i8** null, i32 10) #0 %conv.i = trunc i64 %call.i to i32 - store i32 %conv.i, i32* %in.addr - %args = bitcast i32* %in.addr to i8* - %launch = call i8* @llvm.visc.launch(i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args, i32 1) + %1 = bitcast { i32, %rtype }* %in.addr to i32* + store i32 %conv.i, i32* %1 + %args = bitcast { i32, %rtype }* %in.addr to i8* + %graphIDloc = alloca i8* + %launch = call i32 @llvm.visc.launch(i8** %graphIDloc, i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args) %call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %conv.i) #0 + %graphID = load i8** %graphIDloc + %wait = call i32 @llvm.visc.wait(i8* %graphID) + %2 = getelementptr { i32, %rtype }* %in.addr, i32 0, i32 1 + %outputstruct = load %rtype* %2 + %output = extractvalue %rtype %outputstruct, 0 + %call2 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %output) #0 ret i32 0 } diff --git a/llvm/test/VISC/unitTests/query3D.ll b/llvm/test/VISC/unitTests/query3D.ll index 9dc2e1d01c..bec2cb9ce9 100644 --- a/llvm/test/VISC/unitTests/query3D.ll +++ b/llvm/test/VISC/unitTests/query3D.ll @@ -1,5 +1,6 @@ ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S < %s -; RUN: clang %t.ll -lpthread -o %t.bin +; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll +; RUN: clang -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin ; RUN: %t.bin 5 ; ModuleID = '/home/psrivas2/current-test/unitTests/query3D.ll' target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" @@ -25,7 +26,10 @@ declare i8* @llvm.visc.createNode3D(i8*, i32, i32, i32) #0 declare i8* @llvm.visc.createEdge(i8*, i8*, i1, i32, i32) #0 ; Function Attrs: nounwind -declare i8* @llvm.visc.launch(i8*, i8*, i32) #0 +declare i32 @llvm.visc.launch(i8**, i8*, i8*) #0 + +; Function Attrs: nounwind +declare i32 @llvm.visc.wait(i8*) #0 ; Function Attrs: nounwind declare i8* @llvm.visc.getNode() #0 @@ -48,15 +52,23 @@ declare void @llvm.visc.bind.output(i8*, i32, i32) ; Function Attrs: nounwind uwtable define i32 @main(i32 %argc, i8** nocapture %argv) #1 { entry: - %in.addr = alloca i32 + %in.addr = alloca { i32, %rtype } %arrayidx = getelementptr inbounds i8** %argv, i64 1 %0 = load i8** %arrayidx, align 8, !tbaa !0 %call.i = tail call i64 @strtol(i8* nocapture %0, i8** null, i32 10) #0 %conv.i = trunc i64 %call.i to i32 - store i32 %conv.i, i32* %in.addr - %args = bitcast i32* %in.addr to i8* - %launch = call i8* @llvm.visc.launch(i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args, i32 1) + %1 = bitcast { i32, %rtype }* %in.addr to i32* + store i32 %conv.i, i32* %1 + %args = bitcast { i32, %rtype }* %in.addr to i8* + %graphIDloc = alloca i8* + %launch = call i32 @llvm.visc.launch(i8** %graphIDloc, i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args) %call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %conv.i) #0 + %graphID = load i8** %graphIDloc + %wait = call i32 @llvm.visc.wait(i8* %graphID) + %2 = getelementptr { i32, %rtype }* %in.addr, i32 0, i32 1 + %outputstruct = load %rtype* %2 + %output = extractvalue %rtype %outputstruct, 0 + %call2 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %output) #0 ret i32 0 } diff --git a/llvm/test/VISC/unitTests/queryNodeInst.ll b/llvm/test/VISC/unitTests/queryNodeInst.ll index 4418391e44..258dff23dd 100644 --- a/llvm/test/VISC/unitTests/queryNodeInst.ll +++ b/llvm/test/VISC/unitTests/queryNodeInst.ll @@ -1,5 +1,6 @@ ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S < %s -; RUN: clang %t.ll -lpthread -o %t.bin +; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll +; RUN: clang -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin ; RUN: %t.bin 5 ; ModuleID = '/home/psrivas2/current-test/unitTests/twoNode.ll' target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" @@ -16,7 +17,10 @@ declare i8* @llvm.visc.createNode(i8*) #0 declare i8* @llvm.visc.createEdge(i8*, i8*, i1, i32, i32) #0 ; Function Attrs: nounwind -declare i8* @llvm.visc.launch(i8*, i8*, i32) #0 +declare i32 @llvm.visc.launch(i8**, i8*, i8*) #0 + +; Function Attrs: nounwind +declare i32 @llvm.visc.wait(i8*) #0 ; Function Attrs: nounwind declare i8* @llvm.visc.getNode() #0 @@ -36,15 +40,23 @@ declare void @llvm.visc.bind.output(i8*, i32, i32) ; Function Attrs: nounwind uwtable define i32 @main(i32 %argc, i8** nocapture %argv) #1 { entry: - %in.addr = alloca i32 + %in.addr = alloca { i32, %rtype } %arrayidx = getelementptr inbounds i8** %argv, i64 1 %0 = load i8** %arrayidx, align 8, !tbaa !0 %call.i = tail call i64 @strtol(i8* nocapture %0, i8** null, i32 10) #0 %conv.i = trunc i64 %call.i to i32 - store i32 %conv.i, i32* %in.addr - %args = bitcast i32* %in.addr to i8* - %launch = call i8* @llvm.visc.launch(i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args, i32 1) + %1 = bitcast { i32, %rtype }* %in.addr to i32* + store i32 %conv.i, i32* %1 + %args = bitcast { i32, %rtype }* %in.addr to i8* + %graphIDloc = alloca i8* + %launch = call i32 @llvm.visc.launch(i8** %graphIDloc, i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args) %call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %conv.i) #0 + %graphID = load i8** %graphIDloc + %wait = call i32 @llvm.visc.wait(i8* %graphID) + %2 = getelementptr { i32, %rtype }* %in.addr, i32 0, i32 1 + %outputstruct = load %rtype* %2 + %output = extractvalue %rtype %outputstruct, 0 + %call2 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %output) #0 ret i32 0 } diff --git a/llvm/test/VISC/unitTests/queryNumDim.ll b/llvm/test/VISC/unitTests/queryNumDim.ll index 4418391e44..258dff23dd 100644 --- a/llvm/test/VISC/unitTests/queryNumDim.ll +++ b/llvm/test/VISC/unitTests/queryNumDim.ll @@ -1,5 +1,6 @@ ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S < %s -; RUN: clang %t.ll -lpthread -o %t.bin +; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll +; RUN: clang -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin ; RUN: %t.bin 5 ; ModuleID = '/home/psrivas2/current-test/unitTests/twoNode.ll' target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" @@ -16,7 +17,10 @@ declare i8* @llvm.visc.createNode(i8*) #0 declare i8* @llvm.visc.createEdge(i8*, i8*, i1, i32, i32) #0 ; Function Attrs: nounwind -declare i8* @llvm.visc.launch(i8*, i8*, i32) #0 +declare i32 @llvm.visc.launch(i8**, i8*, i8*) #0 + +; Function Attrs: nounwind +declare i32 @llvm.visc.wait(i8*) #0 ; Function Attrs: nounwind declare i8* @llvm.visc.getNode() #0 @@ -36,15 +40,23 @@ declare void @llvm.visc.bind.output(i8*, i32, i32) ; Function Attrs: nounwind uwtable define i32 @main(i32 %argc, i8** nocapture %argv) #1 { entry: - %in.addr = alloca i32 + %in.addr = alloca { i32, %rtype } %arrayidx = getelementptr inbounds i8** %argv, i64 1 %0 = load i8** %arrayidx, align 8, !tbaa !0 %call.i = tail call i64 @strtol(i8* nocapture %0, i8** null, i32 10) #0 %conv.i = trunc i64 %call.i to i32 - store i32 %conv.i, i32* %in.addr - %args = bitcast i32* %in.addr to i8* - %launch = call i8* @llvm.visc.launch(i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args, i32 1) + %1 = bitcast { i32, %rtype }* %in.addr to i32* + store i32 %conv.i, i32* %1 + %args = bitcast { i32, %rtype }* %in.addr to i8* + %graphIDloc = alloca i8* + %launch = call i32 @llvm.visc.launch(i8** %graphIDloc, i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args) %call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %conv.i) #0 + %graphID = load i8** %graphIDloc + %wait = call i32 @llvm.visc.wait(i8* %graphID) + %2 = getelementptr { i32, %rtype }* %in.addr, i32 0, i32 1 + %outputstruct = load %rtype* %2 + %output = extractvalue %rtype %outputstruct, 0 + %call2 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %output) #0 ret i32 0 } diff --git a/llvm/test/VISC/unitTests/queryNumNodeInst.ll b/llvm/test/VISC/unitTests/queryNumNodeInst.ll index 3432daf510..ce6a639215 100644 --- a/llvm/test/VISC/unitTests/queryNumNodeInst.ll +++ b/llvm/test/VISC/unitTests/queryNumNodeInst.ll @@ -1,5 +1,6 @@ ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S < %s -; RUN: clang %t.ll -lpthread -o %t.bin +; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll +; RUN: clang -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin ; RUN: %t.bin 5 ; ModuleID = '/home/psrivas2/current-test/unitTests/twoNode.ll' target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" @@ -19,7 +20,10 @@ declare i8* @llvm.visc.createNode1D(i8*, i32) #0 declare i8* @llvm.visc.createEdge(i8*, i8*, i1, i32, i32) #0 ; Function Attrs: nounwind -declare i8* @llvm.visc.launch(i8*, i8*, i32) #0 +declare i32 @llvm.visc.launch(i8**, i8*, i8*) #0 + +; Function Attrs: nounwind +declare i32 @llvm.visc.wait(i8*) #0 ; Function Attrs: nounwind declare i8* @llvm.visc.getNode() #0 @@ -42,15 +46,23 @@ declare void @llvm.visc.bind.output(i8*, i32, i32) ; Function Attrs: nounwind uwtable define i32 @main(i32 %argc, i8** nocapture %argv) #1 { entry: - %in.addr = alloca i32 + %in.addr = alloca { i32, %rtype } %arrayidx = getelementptr inbounds i8** %argv, i64 1 %0 = load i8** %arrayidx, align 8, !tbaa !0 %call.i = tail call i64 @strtol(i8* nocapture %0, i8** null, i32 10) #0 %conv.i = trunc i64 %call.i to i32 - store i32 %conv.i, i32* %in.addr - %args = bitcast i32* %in.addr to i8* - %launch = call i8* @llvm.visc.launch(i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args, i32 1) + %1 = bitcast { i32, %rtype }* %in.addr to i32* + store i32 %conv.i, i32* %1 + %args = bitcast { i32, %rtype }* %in.addr to i8* + %graphIDloc = alloca i8* + %launch = call i32 @llvm.visc.launch(i8** %graphIDloc, i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args) %call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %conv.i) #0 + %graphID = load i8** %graphIDloc + %wait = call i32 @llvm.visc.wait(i8* %graphID) + %2 = getelementptr { i32, %rtype }* %in.addr, i32 0, i32 1 + %outputstruct = load %rtype* %2 + %output = extractvalue %rtype %outputstruct, 0 + %call2 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %output) #0 ret i32 0 } diff --git a/llvm/test/VISC/unitTests/singleNode.ll b/llvm/test/VISC/unitTests/singleNode.ll index 162661793b..1d45ff55cb 100644 --- a/llvm/test/VISC/unitTests/singleNode.ll +++ b/llvm/test/VISC/unitTests/singleNode.ll @@ -1,5 +1,6 @@ ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S < %s -; RUN: clang %t.ll -lpthread -o %t.bin +; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll +; RUN: clang -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin ; RUN: %t.bin 5 ; ModuleID = '/home/psrivas2/current-test/unitTests/singleNode.ll' target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" @@ -16,17 +17,25 @@ declare i8* @llvm.visc.createNode(i8*) #0 declare i8* @llvm.visc.createEdge(i8*, i8*, i1, i32, i32) #0 ; Function Attrs: nounwind -declare i8* @llvm.visc.launch(i8*, i8*, i32) #0 +declare i32 @llvm.visc.launch(i8**, i8*, i8*) #0 + +; Function Attrs: nounwind +declare i32 @llvm.visc.wait(i8*) #0 ; Function Attrs: nounwind uwtable define i32 @main(i32 %argc, i8** nocapture %argv) #1 { entry: + %in.addr = alloca { %rtype } %arrayidx = getelementptr inbounds i8** %argv, i64 1 %0 = load i8** %arrayidx, align 8, !tbaa !0 - %launch = call i8* @llvm.visc.launch(i8* bitcast (%rtype ()* @Root to i8*), i8* undef, i32 0) %call.i = tail call i64 @strtol(i8* nocapture %0, i8** null, i32 10) #0 %conv.i = trunc i64 %call.i to i32 + %args = bitcast { %rtype }* %in.addr to i8* + %graphIDloc = alloca i8* + %launch = call i32 @llvm.visc.launch(i8** %graphIDloc, i8* bitcast (%rtype ()* @Root to i8*), i8* %args) %call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %conv.i) #0 + %graphID = load i8** %graphIDloc + %wait = call i32 @llvm.visc.wait(i8* %graphID) ret i32 0 } diff --git a/llvm/test/VISC/unitTests/twoNode.ll b/llvm/test/VISC/unitTests/twoNode.ll index 31c131970e..b0626a988f 100644 --- a/llvm/test/VISC/unitTests/twoNode.ll +++ b/llvm/test/VISC/unitTests/twoNode.ll @@ -1,5 +1,6 @@ ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S < %s -; RUN: clang %t.ll -lpthread -o %t.bin +; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll +; RUN: clang -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin ; RUN: %t.bin 5 ; ModuleID = '/home/psrivas2/current-test/unitTests/twoNode.ll' target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" @@ -16,20 +17,27 @@ declare i8* @llvm.visc.createNode(i8*) #0 declare i8* @llvm.visc.createEdge(i8*, i8*, i1, i32, i32) #0 ; Function Attrs: nounwind -declare i8* @llvm.visc.launch(i8*, i8*, i32) #0 +declare i32 @llvm.visc.launch(i8**, i8*, i8*) #0 + +; Function Attrs: nounwind +declare i32 @llvm.visc.wait(i8*) #0 ; Function Attrs: nounwind uwtable define i32 @main(i32 %argc, i8** nocapture %argv) #1 { entry: - %in.addr = alloca i32 + %in.addr = alloca { i32, %rtype } %arrayidx = getelementptr inbounds i8** %argv, i64 1 %0 = load i8** %arrayidx, align 8, !tbaa !0 %call.i = tail call i64 @strtol(i8* nocapture %0, i8** null, i32 10) #0 %conv.i = trunc i64 %call.i to i32 - store i32 %conv.i, i32* %in.addr - %args = bitcast i32* %in.addr to i8* - %launch = call i8* @llvm.visc.launch(i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args, i32 1) + %1 = bitcast { i32, %rtype }* %in.addr to i32* + store i32 %conv.i, i32* %1 + %args = bitcast { i32, %rtype }* %in.addr to i8* + %graphIDloc = alloca i8* + %launch = call i32 @llvm.visc.launch(i8** %graphIDloc, i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args) %call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %conv.i) #0 + %graphID = load i8** %graphIDloc + %wait = call i32 @llvm.visc.wait(i8* %graphID) ret i32 0 } diff --git a/llvm/test/VISC/unitTests/twoNodeConnect.ll b/llvm/test/VISC/unitTests/twoNodeConnect.ll index b5103faa8e..a005c8fadf 100644 --- a/llvm/test/VISC/unitTests/twoNodeConnect.ll +++ b/llvm/test/VISC/unitTests/twoNodeConnect.ll @@ -1,5 +1,6 @@ ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S < %s -; RUN: clang %t.ll -lpthread -o %t.bin +; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll +; RUN: clang -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin ; RUN: %t.bin 5 ; ModuleID = '/home/psrivas2/current-test/unitTests/twoNodeConnect.ll' target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" @@ -16,7 +17,10 @@ declare i8* @llvm.visc.createNode(i8*) #0 declare i8* @llvm.visc.createEdge(i8*, i8*, i1, i32, i32) #0 ; Function Attrs: nounwind -declare i8* @llvm.visc.launch(i8*, i8*, i32) #0 +declare i32 @llvm.visc.launch(i8**, i8*, i8*) #0 + +; Function Attrs: nounwind +declare i32 @llvm.visc.wait(i8*) #0 ; Function Attrs: nounwind declare i8* @llvm.visc.getNode() #0 @@ -30,15 +34,23 @@ declare void @llvm.visc.bind.output(i8*, i32, i32) ; Function Attrs: nounwind uwtable define i32 @main(i32 %argc, i8** nocapture %argv) #1 { entry: - %in.addr = alloca i32 + %in.addr = alloca { i32, %rtype } %arrayidx = getelementptr inbounds i8** %argv, i64 1 %0 = load i8** %arrayidx, align 8, !tbaa !0 %call.i = tail call i64 @strtol(i8* nocapture %0, i8** null, i32 10) #0 %conv.i = trunc i64 %call.i to i32 - store i32 %conv.i, i32* %in.addr - %args = bitcast i32* %in.addr to i8* - %launch = call i8* @llvm.visc.launch(i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args, i32 1) + %1 = bitcast { i32, %rtype }* %in.addr to i32* + store i32 %conv.i, i32* %1 + %args = bitcast { i32, %rtype }* %in.addr to i8* + %graphIDloc = alloca i8* + %launch = call i32 @llvm.visc.launch(i8** %graphIDloc, i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args) %call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %conv.i) #0 + %graphID = load i8** %graphIDloc + %wait = call i32 @llvm.visc.wait(i8* %graphID) + %2 = getelementptr { i32, %rtype }* %in.addr, i32 0, i32 1 + %outputstruct = load %rtype* %2 + %output = extractvalue %rtype %outputstruct, 0 + %call2 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %output) #0 ret i32 0 } diff --git a/llvm/test/VISC/unitTests/twoNodeQuery.ll b/llvm/test/VISC/unitTests/twoNodeQuery.ll index ce38982c03..08c69507be 100644 --- a/llvm/test/VISC/unitTests/twoNodeQuery.ll +++ b/llvm/test/VISC/unitTests/twoNodeQuery.ll @@ -1,5 +1,6 @@ ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S < %s -; RUN: clang %t.ll -lpthread -o %t.bin +; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll +; RUN: clang -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin ; RUN: %t.bin 5 ; ModuleID = '/home/psrivas2/current-test/unitTests/twoNodeQuery.ll' target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" @@ -16,7 +17,10 @@ declare i8* @llvm.visc.createNode(i8*) #0 declare i8* @llvm.visc.createEdge(i8*, i8*, i1, i32, i32) #0 ; Function Attrs: nounwind -declare i8* @llvm.visc.launch(i8*, i8*, i32) #0 +declare i32 @llvm.visc.launch(i8**, i8*, i8*) #0 + +; Function Attrs: nounwind +declare i32 @llvm.visc.wait(i8*) #0 ; Function Attrs: nounwind declare i8* @llvm.visc.getNode() #0 @@ -36,15 +40,23 @@ declare void @llvm.visc.bind.output(i8*, i32, i32) ; Function Attrs: nounwind uwtable define i32 @main(i32 %argc, i8** nocapture %argv) #1 { entry: - %in.addr = alloca i32 + %in.addr = alloca { i32, %rtype } %arrayidx = getelementptr inbounds i8** %argv, i64 1 %0 = load i8** %arrayidx, align 8, !tbaa !0 %call.i = tail call i64 @strtol(i8* nocapture %0, i8** null, i32 10) #0 %conv.i = trunc i64 %call.i to i32 - store i32 %conv.i, i32* %in.addr - %args = bitcast i32* %in.addr to i8* - %launch = call i8* @llvm.visc.launch(i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args, i32 1) + %1 = bitcast { i32, %rtype }* %in.addr to i32* + store i32 %conv.i, i32* %1 + %args = bitcast { i32, %rtype }* %in.addr to i8* + %graphIDloc = alloca i8* + %launch = call i32 @llvm.visc.launch(i8** %graphIDloc, i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args) %call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %conv.i) #0 + %graphID = load i8** %graphIDloc + %wait = call i32 @llvm.visc.wait(i8* %graphID) + %2 = getelementptr { i32, %rtype }* %in.addr, i32 0, i32 1 + %outputstruct = load %rtype* %2 + %output = extractvalue %rtype %outputstruct, 0 + %call2 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %output) #0 ret i32 0 } -- GitLab