diff --git a/llvm/include/llvm/IR/IntrinsicsVISC.td b/llvm/include/llvm/IR/IntrinsicsVISC.td index e40bec31faa8ecfa21f8024fa67917d72c774691..ed848f99b5b4f18967fe779afe249657c95dad1c 100644 --- a/llvm/include/llvm/IR/IntrinsicsVISC.td +++ b/llvm/include/llvm/IR/IntrinsicsVISC.td @@ -18,12 +18,16 @@ let TargetPrefix = "visc" in { */ /* Launch intrinsic - - * i8* llvm.visc.launch(i8*, i8*, int); + * i32 llvm.visc.launch(graphID*, function* , ArgList*); */ - def int_visc_launch : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_ptr_ty, - llvm_i32_ty], []>; + def int_visc_launch : Intrinsic<[llvm_i32_ty], [llvm_ptrptr_ty, llvm_ptr_ty, + llvm_ptr_ty], []>; + + /* Wait intrinsic - + * i32 llvm.visc.wait(graphID*, returnVal*); + */ + def int_visc_wait : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], []>; - def int_visc_test : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty], []>; /* Create Node intrinsic - * i8* llvm.visc.createNode(function*); */ diff --git a/llvm/lib/Transforms/BuildDFG/BuildDFG.cpp b/llvm/lib/Transforms/BuildDFG/BuildDFG.cpp index 8081f4065eb6b51e2fd61e866f7255510b480617..466bc41e14dee6d7d4ba68aee643495a3ebbb7f2 100644 --- a/llvm/lib/Transforms/BuildDFG/BuildDFG.cpp +++ b/llvm/lib/Transforms/BuildDFG/BuildDFG.cpp @@ -49,7 +49,7 @@ bool BuildDFG::runOnModule(Module &M) { // Intrinsic Instruction has been initialized from this point on. - Function* F = cast<Function>((II->getOperand(0))->stripPointerCasts()); + Function* F = cast<Function>((II->getOperand(1))->stripPointerCasts()); Root = DFInternalNode::Create(II, F); BuildGraph(Root, F); @@ -313,9 +313,6 @@ void BuildDFG::BuildGraph (DFInternalNode* N, Function *F) { if(IntrinsicInst* II = dyn_cast<IntrinsicInst>(I)) { errs() << "IntrinsicID = " << II->getIntrinsicID() << ": " << II->getCalledFunction()->getName()<<"\n"; switch(II->getIntrinsicID()) { - case Intrinsic::visc_test: - errs() << "Found Test Intrinsic"; - break; case Intrinsic::visc_createNode: case Intrinsic::visc_createNode1D: @@ -335,8 +332,9 @@ void BuildDFG::BuildGraph (DFInternalNode* N, Function *F) { break; //TODO: Reconsider launch within a dataflow graph (recursion?) + case Intrinsic::visc_wait: case Intrinsic::visc_launch: - errs() << "Error: Launch intrinsic used within a dataflow graph\n"; + errs() << "Error: Launch/wait intrinsic used within a dataflow graph\n"; break; default: diff --git a/llvm/lib/Transforms/DFG2LLVM_X86/DFG2LLVM_X86.cpp b/llvm/lib/Transforms/DFG2LLVM_X86/DFG2LLVM_X86.cpp index d4cfeccad46faa9ca5606ed7ce03ed1a2cdbe5cd..8c111a64b54b024d584e11b9f1793c391d1cfc20 100644 --- a/llvm/lib/Transforms/DFG2LLVM_X86/DFG2LLVM_X86.cpp +++ b/llvm/lib/Transforms/DFG2LLVM_X86/DFG2LLVM_X86.cpp @@ -63,10 +63,6 @@ private: Value* addLoop(Instruction* I, Value* limit, const Twine& indexName = ""); Argument* getArgumentFromEnd(Function* F, unsigned offset); Argument* getArgumentAt(Function* F, unsigned offset); - Constant* getOrInsertPThreadCreate(); - Constant* getOrInsertPThreadJoin(); - Constant* getOrInsertPThreadExit(); - Constant* getOrInsertMalloc(); void codeGenLaunch(DFInternalNode* Root); void codeGen(DFInternalNode* N); void codeGen(DFLeafNode* N); @@ -117,6 +113,41 @@ bool DFG2LLVM_X86::runOnModule(Module &M) { return true; } +/* Returns vector of all wait instructions + */ +std::vector<CallInst*>* getWaitList(CallInst* LI) { + Value* GraphIDAddr = LI->getArgOperand(0); + std::vector<CallInst*>* WaitList = new std::vector<CallInst*>(); + // It must have been loaded from memory somewhere + GraphIDAddr->use_begin(); + for(Value::use_iterator ui = GraphIDAddr->use_begin(), + ue = GraphIDAddr->use_end(); ui!=ue; ++ui) { + if(LoadInst* LI = dyn_cast<LoadInst>(*ui)) { + DEBUG(errs() << *LI << "\n"); + for(Value::use_iterator i = LI->use_begin(), e = LI->use_end(); i!=e; ++i) { + if(CallInst* waitI = dyn_cast<CallInst>(*i)) { + DEBUG(errs() << *waitI << "\n"); + WaitList->push_back(waitI); + } + } + } + // If graphID memory address is used by another launch, then break + if(CallInst* CI = dyn_cast<CallInst>(*ui)) { + if(LI != CI) { + DEBUG(errs()<< "Warning: Overwriting graph ID in memory -- " << *CI << "\n" << *LI << "\n"); + break; + } + } + // If graphID in memory is overwritten using store, it's an error + if(StoreInst* SI =dyn_cast<StoreInst>(*ui)) { + assert(SI->getPointerOperand() == GraphIDAddr + && "Error: Do not manually write over graphID in memory!"); + } + + } + return WaitList; +} + void CodeGenTraversal::addIdxDimArgs(Function* F) { // Add Index and Dim arguments std::string names[] = {"idx_x", "idx_y", "idx_z", "dim_x", "dim_y", "dim_z"}; @@ -224,6 +255,10 @@ Value* CodeGenTraversal::addLoop(Instruction* I, Value* limit, const Twine& inde } void CodeGenTraversal::codeGenLaunch(DFInternalNode* Root) { + // TODO: Place an assert to check if the constant passed bu launch intrinsic + // as the number of arguments to DFG is same as the number of arguments of the + // root of DFG + // Get Launch Instruction IntrinsicInst* LI = Root->getInstruction(); @@ -233,54 +268,42 @@ void CodeGenTraversal::codeGenLaunch(DFInternalNode* Root) { Type* i8Ty = Type::getInt8Ty(LI->getContext()); Type* voidTy = Type::getVoidTy(LI->getContext()); - /* Get or Insert pthread utilities necessary to run DFG as a separate thread - * (1) pthread_create - * (2) pthread_join - * (3) pthread_exit - * Also requires a new struct for pthread_attr_t + /* Get or Insert visc runtime utilities necessary to run DFG as a separate thread + * (1) llvm_visc_launch_x86 + * (2) llvm_visc_wait_x86 */ - Type *PThreadTy, *PThreadAttrTy, *PThreadArgTy; + Type *GraphIDTy; std::vector<Type*>Elements; // PThreads use different attribute types for 32-bit and 64-bit machines if(M.getPointerSize() == Module::Pointer64) { - PThreadTy = Type::getInt64Ty(LI->getContext()); - Elements.push_back(ArrayType::get(Type::getInt8Ty(LI->getContext()), 56)); + GraphIDTy = i64Ty; } else { - PThreadTy = Type::getInt32Ty(LI->getContext()); - Elements.push_back(ArrayType::get(Type::getInt8Ty(LI->getContext()), 36)); + GraphIDTy = i32Ty; } - PThreadAttrTy = StructType::create(LI->getContext(), Elements, "union.pthread_attr_t"); - FunctionType* PThreadFuncTy = FunctionType::get(i8Ty->getPointerTo(), - ArrayRef<Type*>(i8Ty->getPointerTo()), - false); - PThreadArgTy = i8Ty->getPointerTo(); - - // Argument types for pthread_create - Type* ArgTypesPTCreate[] = {PThreadTy->getPointerTo(), - PThreadAttrTy->getPointerTo(), - PThreadFuncTy->getPointerTo(), - i8Ty->getPointerTo()}; - // Construct FunctionType of pthread_create call - FunctionType* PThreadCreateTy = FunctionType::get(i32Ty, - ArrayRef<Type*>(ArgTypesPTCreate, 4), - false); - // Argument types for pthread_join - Type* ArgTypesPTJoin[] = {PThreadTy, - i8Ty->getPointerTo()->getPointerTo()}; - // Construct FunctionType for pthread_join call - FunctionType* PThreadJoinTy = FunctionType::get(i32Ty, - ArrayRef<Type*>(ArgTypesPTJoin, 2), - false); - // Construct FunctionType for pthread_exit call - FunctionType* PThreadExitTy = FunctionType::get(voidTy, + FunctionType* AppFuncTy = FunctionType::get(i8Ty->getPointerTo(), ArrayRef<Type*>(i8Ty->getPointerTo()), false); + // Argument types for llvm_visc_launch_x86 + Type* ArgTypesLaunch[] = {i8Ty->getPointerTo()->getPointerTo(), + AppFuncTy->getPointerTo(), + i8Ty->getPointerTo()}; + + // Construct FunctionType of llvm_visc_launch_x86 call + FunctionType* LaunchFuncTy = FunctionType::get(i32Ty, + ArrayRef<Type*>(ArgTypesLaunch, 3), + false); + + // Construct FunctionType for llvm_visc_wait_x86 call + FunctionType* WaitFuncTy = FunctionType::get(i32Ty, + ArrayRef<Type*>(i8Ty->getPointerTo()), + false); + // Get or insert the global declarations for pthread functions - Constant* PThreadCreate = M.getOrInsertFunction("pthread_create", PThreadCreateTy); - Constant* PThreadJoin = M.getOrInsertFunction("pthread_join", PThreadJoinTy); - Constant* PThreadExit = M.getOrInsertFunction("pthread_exit", PThreadExitTy); + Constant* Launch = M.getOrInsertFunction("llvm_visc_launch_x86", LaunchFuncTy); + Constant* Wait = M.getOrInsertFunction("llvm_visc_wait_x86", WaitFuncTy); + // Construct FunctionType for malloc call FunctionType* MallocTy = FunctionType::get(i8Ty->getPointerTo(), ArrayRef<Type*>(i64Ty), @@ -301,17 +324,17 @@ void CodeGenTraversal::codeGenLaunch(DFInternalNode* Root) { * passed to pthread_exit call. */ // Create Launch Function of type i8*(i8*) which calls the root function - Function* LaunchFunc = Function::Create(PThreadFuncTy, - Root->getFuncPointer()->getLinkage(), - "LaunchDataflowGraph", - &M); + Function* AppFunc = Function::Create(AppFuncTy, + Root->getFuncPointer()->getLinkage(), + "LaunchDataflowGraph", + &M); // Give a name to the argument which is used pass data to this thread - Value* data = LaunchFunc->arg_begin(); + Value* data = AppFunc->arg_begin(); data->setName("data.addr"); // Add a basic block to this empty function and a return null statement to it - BasicBlock *BB = BasicBlock::Create(LaunchFunc->getContext(), "entry", LaunchFunc); - ReturnInst* RI = ReturnInst::Create(LaunchFunc->getContext(), - Constant::getNullValue(LaunchFunc->getReturnType()), + BasicBlock *BB = BasicBlock::Create(AppFunc->getContext(), "entry", AppFunc); + ReturnInst* RI = ReturnInst::Create(AppFunc->getContext(), + Constant::getNullValue(AppFunc->getReturnType()), BB); // Find the X86 function generated for Root and Function* RootF_X86 = FMap[Root->getFuncPointer()]; @@ -351,46 +374,38 @@ void CodeGenTraversal::codeGenLaunch(DFInternalNode* Root) { argNum++; data = GEP; } + // Code for returning the output Constant* SizeOf = ConstantExpr::getSizeOf(CI->getType()); - CallInst* OutputAddr = CallInst::Create(Malloc, ArrayRef<Value*>(SizeOf), "output.addr", RI); - CastInst* OutputAddrCast = CastInst::CreatePointerCast(OutputAddr, + CastInst* OutputAddrCast = CastInst::CreatePointerCast(data, CI->getType()->getPointerTo(), CI->getName()+".addr", RI); new StoreInst(CI, OutputAddrCast, RI); - CallInst::Create(PThreadExit, ArrayRef<Value*>(OutputAddr), "", RI); - DEBUG(errs() << "Launch Function:\n"); - DEBUG(errs() << *LaunchFunc << "\n"); + DEBUG(errs() << "Application specific function:\n"); + DEBUG(errs() << *AppFunc << "\n"); // Substitute launch intrinsic main - AllocaInst* AI = new AllocaInst(PThreadTy, "DFG_threadID.addr", LI); - DEBUG(errs() << *AI << "\n"); - Value* PTCreateArgs[] = {AI, - Constant::getNullValue(PThreadCreateTy->getParamType(1)), - LaunchFunc, - LI->getArgOperand(1)}; - CallInst* PTCreateInst = CallInst::Create(PThreadCreate, - ArrayRef<Value*>(PTCreateArgs,4), - "", LI); - - DEBUG(errs() << *PTCreateInst << "\n"); - // Place Join - LoadInst* LoadPThreadID = new LoadInst(AI, "DFG_threadID", LI->getParent()->getTerminator()); - AllocaInst* DFGOut = new AllocaInst(RootF_X86->getReturnType()->getPointerTo(), - LaunchFunc->getName()+".out", - LI->getParent()->getTerminator()); - CastInst* DFGOutCast = CastInst::CreatePointerCast(DFGOut, - i8Ty->getPointerTo()->getPointerTo(), "DFG_return", - LI->getParent()->getTerminator()); - - Value* PTJoinArgs[] = {LoadPThreadID, DFGOutCast}; - CallInst* PTJoinInst = CallInst::Create(PThreadJoin, - ArrayRef<Value*>(PTJoinArgs,2), - "", - LI->getParent()->getTerminator()); - DEBUG(errs() << *PTJoinInst << "\n"); + Value* LaunchInstArgs[] = {LI->getArgOperand(0), + AppFunc, + LI->getArgOperand(2)}; + CallInst* LaunchInst = CallInst::Create(Launch, + ArrayRef<Value*>(LaunchInstArgs,3), + "", LI); + //ReplaceInstWithInst(LI, LaunchInst); + + DEBUG(errs() << *LaunchInst << "\n"); + // Replace all wait instructions with x86 specific wait instructions + std::vector<CallInst*>* WaitList = getWaitList(LaunchInst); + for(unsigned i=0; i < WaitList->size(); ++i) { + CallInst* waitI = WaitList->at(i); + CallInst* waitI_X86 = CallInst::Create(Wait, + ArrayRef<Value*>(waitI->getArgOperand(0)), + ""); + ReplaceInstWithInst(waitI, waitI_X86); + DEBUG(errs() << *waitI_X86 << "\n"); + } } diff --git a/llvm/projects/visc-rt/visc-rt.cpp b/llvm/projects/visc-rt/visc-rt.cpp index 7945230f264f1ec6a5ca9fd7fe02d0e3b7fbb42c..9080a31b4a4a39aaea7ef4dffac4f055a24ad48d 100644 --- a/llvm/projects/visc-rt/visc-rt.cpp +++ b/llvm/projects/visc-rt/visc-rt.cpp @@ -15,14 +15,14 @@ typedef struct { } DFNodeContext_PTX; extern "C" -__int32_t llvm_visc_launch_x86(size_t* graphID, void* (*rootFunc)(void*), void* arguments) { +__int32_t llvm_visc_launch_x86(void** graphID, void* (*rootFunc)(void*), void* arguments) { DFNodeContext_X86 *Context = (DFNodeContext_X86 *) malloc(sizeof(DFNodeContext_X86)); + *graphID = Context; return pthread_create(&Context->threadID, NULL, rootFunc, arguments); - *graphID = (size_t) Context; } extern "C" -__int32_t llvm_visc_wait_x86(size_t graphID) { +__int32_t llvm_visc_wait_x86(void* graphID) { DFNodeContext_X86* Context = (DFNodeContext_X86*) graphID; return pthread_join(Context->threadID, NULL); } @@ -87,14 +87,14 @@ static char* LoadProgSource(const char* cFilename, size_t* szFinalLength) } extern "C" -__int32_t llvm_visc_launch_ptx(size_t* graphID, void* (*rootFunc) (void*), void* arguments) { +__int32_t llvm_visc_launch_ptx(void** graphID, void* (*rootFunc) (void*), void* arguments) { // Initialize OpenCL // OpenCL specific variables DFNodeContext_PTX *Context = (DFNodeContext_PTX *) malloc(sizeof(DFNodeContext_PTX)); // Return Context pointer as grpahID; - *graphID = (size_t) Context; + *graphID = Context; size_t dataBytes; @@ -117,7 +117,7 @@ __int32_t llvm_visc_launch_ptx(size_t* graphID, void* (*rootFunc) (void*), void* checkErr(errcode, CL_SUCCESS, "Failure to get number of platforms"); // now get all the platform IDs - cl_platform_id platforms[numPlatforms]; + cl_platform_id* platforms = (cl_platform_id*) malloc(sizeof(cl_platform_id)*numPlatforms); errcode = clGetPlatformIDs(numPlatforms, platforms, NULL); checkErr(errcode, CL_SUCCESS, "Failure to get platform IDs"); @@ -242,7 +242,7 @@ __int32_t llvm_visc_launch_ptx(size_t* graphID, void* (*rootFunc) (void*), void* extern "C" -__int32_t llvm_visc_wait_ptx(size_t graphID) { +__int32_t llvm_visc_wait_ptx(void* graphID) { DFNodeContext_PTX *Context = (DFNodeContext_PTX*) graphID; clFinish(Context->clCommandQue); diff --git a/llvm/test/VISC/unitTests/3level.ll b/llvm/test/VISC/unitTests/3level.ll index dbb13c69510888f2a10866e800058a23cb332b50..c884acf32cafdce42a5b8e219c3a1ab32676afae 100644 --- a/llvm/test/VISC/unitTests/3level.ll +++ b/llvm/test/VISC/unitTests/3level.ll @@ -1,5 +1,6 @@ ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S < %s -; RUN: clang %t.ll -lpthread -o %t.bin +; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll +; RUN: clang -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin ; RUN: %t.bin 5 ; ModuleID = '/home/psrivas2/current-test/unitTests/3level.ll' target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" @@ -16,7 +17,10 @@ declare i8* @llvm.visc.createNode(i8*) #0 declare i8* @llvm.visc.createEdge(i8*, i8*, i1, i32, i32) #0 ; Function Attrs: nounwind -declare i8* @llvm.visc.launch(i8*, i8*, i32) #0 +declare i32 @llvm.visc.launch(i8**, i8*, i8*) #0 + +; Function Attrs: nounwind +declare i32 @llvm.visc.wait(i8*) #0 ; Function Attrs: nounwind declare i8* @llvm.visc.getNode() #0 @@ -30,15 +34,25 @@ declare void @llvm.visc.bind.output(i8*, i32, i32) ; Function Attrs: nounwind uwtable define i32 @main(i32 %argc, i8** nocapture %argv) #1 { entry: - %in.addr = alloca i32 + %in.addr = alloca { i32, %rtype } %arrayidx = getelementptr inbounds i8** %argv, i64 1 %0 = load i8** %arrayidx, align 8, !tbaa !0 %call.i = tail call i64 @strtol(i8* nocapture %0, i8** null, i32 10) #0 %conv.i = trunc i64 %call.i to i32 - store i32 %conv.i, i32* %in.addr - %args = bitcast i32* %in.addr to i8* - %launch = call i8* @llvm.visc.launch(i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args, i32 1) + %1 = bitcast { i32, %rtype }* %in.addr to i32* + store i32 %conv.i, i32* %1 + %args = bitcast { i32, %rtype }* %in.addr to i8* + %graphIDloc = alloca i8* + %launch = call i32 @llvm.visc.launch(i8** %graphIDloc, i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args) %call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %conv.i) #0 + %graphID = load i8** %graphIDloc + %wait = call i32 @llvm.visc.wait(i8* %graphID) + %2 = getelementptr { i32, %rtype }* %in.addr, i32 0, i32 1 + %outputstruct = load %rtype* %2 + %output1 = extractvalue %rtype %outputstruct, 0 + %output2 = extractvalue %rtype %outputstruct, 1 + %call2 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %output1) #0 + %call3 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %output2) #0 ret i32 0 } diff --git a/llvm/test/VISC/unitTests/query2D.ll b/llvm/test/VISC/unitTests/query2D.ll index c5d4bd2025b66144780e933f0b6b5c9b5032a5d0..9b2ad72c5abac5dd6488a3d8f0848d262d45f521 100644 --- a/llvm/test/VISC/unitTests/query2D.ll +++ b/llvm/test/VISC/unitTests/query2D.ll @@ -1,5 +1,6 @@ ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S < %s -; RUN: clang %t.ll -lpthread -o %t.bin +; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll +; RUN: clang -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin ; RUN: %t.bin 5 ; ModuleID = '/home/psrivas2/current-test/unitTests/query2D.ll' target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" @@ -22,7 +23,10 @@ declare i8* @llvm.visc.createNode2D(i8*, i32, i32) #0 declare i8* @llvm.visc.createEdge(i8*, i8*, i1, i32, i32) #0 ; Function Attrs: nounwind -declare i8* @llvm.visc.launch(i8*, i8*, i32) #0 +declare i32 @llvm.visc.launch(i8**, i8*, i8*) #0 + +; Function Attrs: nounwind +declare i32 @llvm.visc.wait(i8*) #0 ; Function Attrs: nounwind declare i8* @llvm.visc.getNode() #0 @@ -45,15 +49,23 @@ declare void @llvm.visc.bind.output(i8*, i32, i32) ; Function Attrs: nounwind uwtable define i32 @main(i32 %argc, i8** nocapture %argv) #1 { entry: - %in.addr = alloca i32 + %in.addr = alloca { i32, %rtype } %arrayidx = getelementptr inbounds i8** %argv, i64 1 %0 = load i8** %arrayidx, align 8, !tbaa !0 %call.i = tail call i64 @strtol(i8* nocapture %0, i8** null, i32 10) #0 %conv.i = trunc i64 %call.i to i32 - store i32 %conv.i, i32* %in.addr - %args = bitcast i32* %in.addr to i8* - %launch = call i8* @llvm.visc.launch(i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args, i32 1) + %1 = bitcast { i32, %rtype }* %in.addr to i32* + store i32 %conv.i, i32* %1 + %args = bitcast { i32, %rtype }* %in.addr to i8* + %graphIDloc = alloca i8* + %launch = call i32 @llvm.visc.launch(i8** %graphIDloc, i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args) %call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %conv.i) #0 + %graphID = load i8** %graphIDloc + %wait = call i32 @llvm.visc.wait(i8* %graphID) + %2 = getelementptr { i32, %rtype }* %in.addr, i32 0, i32 1 + %outputstruct = load %rtype* %2 + %output = extractvalue %rtype %outputstruct, 0 + %call2 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %output) #0 ret i32 0 } diff --git a/llvm/test/VISC/unitTests/query3D.ll b/llvm/test/VISC/unitTests/query3D.ll index 9dc2e1d01ca2160265a544dd3e5ec088b5ad5457..bec2cb9ce94cf2be87fa8b2c107a26e7acae43c0 100644 --- a/llvm/test/VISC/unitTests/query3D.ll +++ b/llvm/test/VISC/unitTests/query3D.ll @@ -1,5 +1,6 @@ ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S < %s -; RUN: clang %t.ll -lpthread -o %t.bin +; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll +; RUN: clang -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin ; RUN: %t.bin 5 ; ModuleID = '/home/psrivas2/current-test/unitTests/query3D.ll' target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" @@ -25,7 +26,10 @@ declare i8* @llvm.visc.createNode3D(i8*, i32, i32, i32) #0 declare i8* @llvm.visc.createEdge(i8*, i8*, i1, i32, i32) #0 ; Function Attrs: nounwind -declare i8* @llvm.visc.launch(i8*, i8*, i32) #0 +declare i32 @llvm.visc.launch(i8**, i8*, i8*) #0 + +; Function Attrs: nounwind +declare i32 @llvm.visc.wait(i8*) #0 ; Function Attrs: nounwind declare i8* @llvm.visc.getNode() #0 @@ -48,15 +52,23 @@ declare void @llvm.visc.bind.output(i8*, i32, i32) ; Function Attrs: nounwind uwtable define i32 @main(i32 %argc, i8** nocapture %argv) #1 { entry: - %in.addr = alloca i32 + %in.addr = alloca { i32, %rtype } %arrayidx = getelementptr inbounds i8** %argv, i64 1 %0 = load i8** %arrayidx, align 8, !tbaa !0 %call.i = tail call i64 @strtol(i8* nocapture %0, i8** null, i32 10) #0 %conv.i = trunc i64 %call.i to i32 - store i32 %conv.i, i32* %in.addr - %args = bitcast i32* %in.addr to i8* - %launch = call i8* @llvm.visc.launch(i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args, i32 1) + %1 = bitcast { i32, %rtype }* %in.addr to i32* + store i32 %conv.i, i32* %1 + %args = bitcast { i32, %rtype }* %in.addr to i8* + %graphIDloc = alloca i8* + %launch = call i32 @llvm.visc.launch(i8** %graphIDloc, i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args) %call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %conv.i) #0 + %graphID = load i8** %graphIDloc + %wait = call i32 @llvm.visc.wait(i8* %graphID) + %2 = getelementptr { i32, %rtype }* %in.addr, i32 0, i32 1 + %outputstruct = load %rtype* %2 + %output = extractvalue %rtype %outputstruct, 0 + %call2 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %output) #0 ret i32 0 } diff --git a/llvm/test/VISC/unitTests/queryNodeInst.ll b/llvm/test/VISC/unitTests/queryNodeInst.ll index 4418391e44d3dd941837dcc9c3833f5ccbc3f20d..258dff23dd3cd4b20891c04d9f990ae32397b041 100644 --- a/llvm/test/VISC/unitTests/queryNodeInst.ll +++ b/llvm/test/VISC/unitTests/queryNodeInst.ll @@ -1,5 +1,6 @@ ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S < %s -; RUN: clang %t.ll -lpthread -o %t.bin +; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll +; RUN: clang -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin ; RUN: %t.bin 5 ; ModuleID = '/home/psrivas2/current-test/unitTests/twoNode.ll' target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" @@ -16,7 +17,10 @@ declare i8* @llvm.visc.createNode(i8*) #0 declare i8* @llvm.visc.createEdge(i8*, i8*, i1, i32, i32) #0 ; Function Attrs: nounwind -declare i8* @llvm.visc.launch(i8*, i8*, i32) #0 +declare i32 @llvm.visc.launch(i8**, i8*, i8*) #0 + +; Function Attrs: nounwind +declare i32 @llvm.visc.wait(i8*) #0 ; Function Attrs: nounwind declare i8* @llvm.visc.getNode() #0 @@ -36,15 +40,23 @@ declare void @llvm.visc.bind.output(i8*, i32, i32) ; Function Attrs: nounwind uwtable define i32 @main(i32 %argc, i8** nocapture %argv) #1 { entry: - %in.addr = alloca i32 + %in.addr = alloca { i32, %rtype } %arrayidx = getelementptr inbounds i8** %argv, i64 1 %0 = load i8** %arrayidx, align 8, !tbaa !0 %call.i = tail call i64 @strtol(i8* nocapture %0, i8** null, i32 10) #0 %conv.i = trunc i64 %call.i to i32 - store i32 %conv.i, i32* %in.addr - %args = bitcast i32* %in.addr to i8* - %launch = call i8* @llvm.visc.launch(i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args, i32 1) + %1 = bitcast { i32, %rtype }* %in.addr to i32* + store i32 %conv.i, i32* %1 + %args = bitcast { i32, %rtype }* %in.addr to i8* + %graphIDloc = alloca i8* + %launch = call i32 @llvm.visc.launch(i8** %graphIDloc, i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args) %call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %conv.i) #0 + %graphID = load i8** %graphIDloc + %wait = call i32 @llvm.visc.wait(i8* %graphID) + %2 = getelementptr { i32, %rtype }* %in.addr, i32 0, i32 1 + %outputstruct = load %rtype* %2 + %output = extractvalue %rtype %outputstruct, 0 + %call2 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %output) #0 ret i32 0 } diff --git a/llvm/test/VISC/unitTests/queryNumDim.ll b/llvm/test/VISC/unitTests/queryNumDim.ll index 4418391e44d3dd941837dcc9c3833f5ccbc3f20d..258dff23dd3cd4b20891c04d9f990ae32397b041 100644 --- a/llvm/test/VISC/unitTests/queryNumDim.ll +++ b/llvm/test/VISC/unitTests/queryNumDim.ll @@ -1,5 +1,6 @@ ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S < %s -; RUN: clang %t.ll -lpthread -o %t.bin +; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll +; RUN: clang -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin ; RUN: %t.bin 5 ; ModuleID = '/home/psrivas2/current-test/unitTests/twoNode.ll' target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" @@ -16,7 +17,10 @@ declare i8* @llvm.visc.createNode(i8*) #0 declare i8* @llvm.visc.createEdge(i8*, i8*, i1, i32, i32) #0 ; Function Attrs: nounwind -declare i8* @llvm.visc.launch(i8*, i8*, i32) #0 +declare i32 @llvm.visc.launch(i8**, i8*, i8*) #0 + +; Function Attrs: nounwind +declare i32 @llvm.visc.wait(i8*) #0 ; Function Attrs: nounwind declare i8* @llvm.visc.getNode() #0 @@ -36,15 +40,23 @@ declare void @llvm.visc.bind.output(i8*, i32, i32) ; Function Attrs: nounwind uwtable define i32 @main(i32 %argc, i8** nocapture %argv) #1 { entry: - %in.addr = alloca i32 + %in.addr = alloca { i32, %rtype } %arrayidx = getelementptr inbounds i8** %argv, i64 1 %0 = load i8** %arrayidx, align 8, !tbaa !0 %call.i = tail call i64 @strtol(i8* nocapture %0, i8** null, i32 10) #0 %conv.i = trunc i64 %call.i to i32 - store i32 %conv.i, i32* %in.addr - %args = bitcast i32* %in.addr to i8* - %launch = call i8* @llvm.visc.launch(i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args, i32 1) + %1 = bitcast { i32, %rtype }* %in.addr to i32* + store i32 %conv.i, i32* %1 + %args = bitcast { i32, %rtype }* %in.addr to i8* + %graphIDloc = alloca i8* + %launch = call i32 @llvm.visc.launch(i8** %graphIDloc, i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args) %call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %conv.i) #0 + %graphID = load i8** %graphIDloc + %wait = call i32 @llvm.visc.wait(i8* %graphID) + %2 = getelementptr { i32, %rtype }* %in.addr, i32 0, i32 1 + %outputstruct = load %rtype* %2 + %output = extractvalue %rtype %outputstruct, 0 + %call2 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %output) #0 ret i32 0 } diff --git a/llvm/test/VISC/unitTests/queryNumNodeInst.ll b/llvm/test/VISC/unitTests/queryNumNodeInst.ll index 3432daf5104291508ff01db5fe94ebc6948cf92a..ce6a639215f8ad2602452581d46d719980db36bf 100644 --- a/llvm/test/VISC/unitTests/queryNumNodeInst.ll +++ b/llvm/test/VISC/unitTests/queryNumNodeInst.ll @@ -1,5 +1,6 @@ ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S < %s -; RUN: clang %t.ll -lpthread -o %t.bin +; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll +; RUN: clang -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin ; RUN: %t.bin 5 ; ModuleID = '/home/psrivas2/current-test/unitTests/twoNode.ll' target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" @@ -19,7 +20,10 @@ declare i8* @llvm.visc.createNode1D(i8*, i32) #0 declare i8* @llvm.visc.createEdge(i8*, i8*, i1, i32, i32) #0 ; Function Attrs: nounwind -declare i8* @llvm.visc.launch(i8*, i8*, i32) #0 +declare i32 @llvm.visc.launch(i8**, i8*, i8*) #0 + +; Function Attrs: nounwind +declare i32 @llvm.visc.wait(i8*) #0 ; Function Attrs: nounwind declare i8* @llvm.visc.getNode() #0 @@ -42,15 +46,23 @@ declare void @llvm.visc.bind.output(i8*, i32, i32) ; Function Attrs: nounwind uwtable define i32 @main(i32 %argc, i8** nocapture %argv) #1 { entry: - %in.addr = alloca i32 + %in.addr = alloca { i32, %rtype } %arrayidx = getelementptr inbounds i8** %argv, i64 1 %0 = load i8** %arrayidx, align 8, !tbaa !0 %call.i = tail call i64 @strtol(i8* nocapture %0, i8** null, i32 10) #0 %conv.i = trunc i64 %call.i to i32 - store i32 %conv.i, i32* %in.addr - %args = bitcast i32* %in.addr to i8* - %launch = call i8* @llvm.visc.launch(i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args, i32 1) + %1 = bitcast { i32, %rtype }* %in.addr to i32* + store i32 %conv.i, i32* %1 + %args = bitcast { i32, %rtype }* %in.addr to i8* + %graphIDloc = alloca i8* + %launch = call i32 @llvm.visc.launch(i8** %graphIDloc, i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args) %call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %conv.i) #0 + %graphID = load i8** %graphIDloc + %wait = call i32 @llvm.visc.wait(i8* %graphID) + %2 = getelementptr { i32, %rtype }* %in.addr, i32 0, i32 1 + %outputstruct = load %rtype* %2 + %output = extractvalue %rtype %outputstruct, 0 + %call2 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %output) #0 ret i32 0 } diff --git a/llvm/test/VISC/unitTests/singleNode.ll b/llvm/test/VISC/unitTests/singleNode.ll index 162661793beb53186c94f9e5202fdabd2bc3dd53..1d45ff55cb467301dc1ab49b642c221e7933ab5b 100644 --- a/llvm/test/VISC/unitTests/singleNode.ll +++ b/llvm/test/VISC/unitTests/singleNode.ll @@ -1,5 +1,6 @@ ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S < %s -; RUN: clang %t.ll -lpthread -o %t.bin +; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll +; RUN: clang -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin ; RUN: %t.bin 5 ; ModuleID = '/home/psrivas2/current-test/unitTests/singleNode.ll' target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" @@ -16,17 +17,25 @@ declare i8* @llvm.visc.createNode(i8*) #0 declare i8* @llvm.visc.createEdge(i8*, i8*, i1, i32, i32) #0 ; Function Attrs: nounwind -declare i8* @llvm.visc.launch(i8*, i8*, i32) #0 +declare i32 @llvm.visc.launch(i8**, i8*, i8*) #0 + +; Function Attrs: nounwind +declare i32 @llvm.visc.wait(i8*) #0 ; Function Attrs: nounwind uwtable define i32 @main(i32 %argc, i8** nocapture %argv) #1 { entry: + %in.addr = alloca { %rtype } %arrayidx = getelementptr inbounds i8** %argv, i64 1 %0 = load i8** %arrayidx, align 8, !tbaa !0 - %launch = call i8* @llvm.visc.launch(i8* bitcast (%rtype ()* @Root to i8*), i8* undef, i32 0) %call.i = tail call i64 @strtol(i8* nocapture %0, i8** null, i32 10) #0 %conv.i = trunc i64 %call.i to i32 + %args = bitcast { %rtype }* %in.addr to i8* + %graphIDloc = alloca i8* + %launch = call i32 @llvm.visc.launch(i8** %graphIDloc, i8* bitcast (%rtype ()* @Root to i8*), i8* %args) %call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %conv.i) #0 + %graphID = load i8** %graphIDloc + %wait = call i32 @llvm.visc.wait(i8* %graphID) ret i32 0 } diff --git a/llvm/test/VISC/unitTests/twoNode.ll b/llvm/test/VISC/unitTests/twoNode.ll index 31c131970e18e1fd0e5659d8c4cafc56ad875123..b0626a988ff68ee8aebb79a7edf85fc9bbb40a55 100644 --- a/llvm/test/VISC/unitTests/twoNode.ll +++ b/llvm/test/VISC/unitTests/twoNode.ll @@ -1,5 +1,6 @@ ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S < %s -; RUN: clang %t.ll -lpthread -o %t.bin +; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll +; RUN: clang -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin ; RUN: %t.bin 5 ; ModuleID = '/home/psrivas2/current-test/unitTests/twoNode.ll' target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" @@ -16,20 +17,27 @@ declare i8* @llvm.visc.createNode(i8*) #0 declare i8* @llvm.visc.createEdge(i8*, i8*, i1, i32, i32) #0 ; Function Attrs: nounwind -declare i8* @llvm.visc.launch(i8*, i8*, i32) #0 +declare i32 @llvm.visc.launch(i8**, i8*, i8*) #0 + +; Function Attrs: nounwind +declare i32 @llvm.visc.wait(i8*) #0 ; Function Attrs: nounwind uwtable define i32 @main(i32 %argc, i8** nocapture %argv) #1 { entry: - %in.addr = alloca i32 + %in.addr = alloca { i32, %rtype } %arrayidx = getelementptr inbounds i8** %argv, i64 1 %0 = load i8** %arrayidx, align 8, !tbaa !0 %call.i = tail call i64 @strtol(i8* nocapture %0, i8** null, i32 10) #0 %conv.i = trunc i64 %call.i to i32 - store i32 %conv.i, i32* %in.addr - %args = bitcast i32* %in.addr to i8* - %launch = call i8* @llvm.visc.launch(i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args, i32 1) + %1 = bitcast { i32, %rtype }* %in.addr to i32* + store i32 %conv.i, i32* %1 + %args = bitcast { i32, %rtype }* %in.addr to i8* + %graphIDloc = alloca i8* + %launch = call i32 @llvm.visc.launch(i8** %graphIDloc, i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args) %call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %conv.i) #0 + %graphID = load i8** %graphIDloc + %wait = call i32 @llvm.visc.wait(i8* %graphID) ret i32 0 } diff --git a/llvm/test/VISC/unitTests/twoNodeConnect.ll b/llvm/test/VISC/unitTests/twoNodeConnect.ll index b5103faa8efe504963d2e931316653f534427ae0..a005c8fadf11e555a892ab82114b09b6b8d75e39 100644 --- a/llvm/test/VISC/unitTests/twoNodeConnect.ll +++ b/llvm/test/VISC/unitTests/twoNodeConnect.ll @@ -1,5 +1,6 @@ ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S < %s -; RUN: clang %t.ll -lpthread -o %t.bin +; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll +; RUN: clang -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin ; RUN: %t.bin 5 ; ModuleID = '/home/psrivas2/current-test/unitTests/twoNodeConnect.ll' target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" @@ -16,7 +17,10 @@ declare i8* @llvm.visc.createNode(i8*) #0 declare i8* @llvm.visc.createEdge(i8*, i8*, i1, i32, i32) #0 ; Function Attrs: nounwind -declare i8* @llvm.visc.launch(i8*, i8*, i32) #0 +declare i32 @llvm.visc.launch(i8**, i8*, i8*) #0 + +; Function Attrs: nounwind +declare i32 @llvm.visc.wait(i8*) #0 ; Function Attrs: nounwind declare i8* @llvm.visc.getNode() #0 @@ -30,15 +34,23 @@ declare void @llvm.visc.bind.output(i8*, i32, i32) ; Function Attrs: nounwind uwtable define i32 @main(i32 %argc, i8** nocapture %argv) #1 { entry: - %in.addr = alloca i32 + %in.addr = alloca { i32, %rtype } %arrayidx = getelementptr inbounds i8** %argv, i64 1 %0 = load i8** %arrayidx, align 8, !tbaa !0 %call.i = tail call i64 @strtol(i8* nocapture %0, i8** null, i32 10) #0 %conv.i = trunc i64 %call.i to i32 - store i32 %conv.i, i32* %in.addr - %args = bitcast i32* %in.addr to i8* - %launch = call i8* @llvm.visc.launch(i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args, i32 1) + %1 = bitcast { i32, %rtype }* %in.addr to i32* + store i32 %conv.i, i32* %1 + %args = bitcast { i32, %rtype }* %in.addr to i8* + %graphIDloc = alloca i8* + %launch = call i32 @llvm.visc.launch(i8** %graphIDloc, i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args) %call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %conv.i) #0 + %graphID = load i8** %graphIDloc + %wait = call i32 @llvm.visc.wait(i8* %graphID) + %2 = getelementptr { i32, %rtype }* %in.addr, i32 0, i32 1 + %outputstruct = load %rtype* %2 + %output = extractvalue %rtype %outputstruct, 0 + %call2 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %output) #0 ret i32 0 } diff --git a/llvm/test/VISC/unitTests/twoNodeQuery.ll b/llvm/test/VISC/unitTests/twoNodeQuery.ll index ce38982c0379a610030bcb5b85ae2a599a69960d..08c69507be55c425f53aa8d41ea7f68d62e3ae48 100644 --- a/llvm/test/VISC/unitTests/twoNodeQuery.ll +++ b/llvm/test/VISC/unitTests/twoNodeQuery.ll @@ -1,5 +1,6 @@ ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S < %s -; RUN: clang %t.ll -lpthread -o %t.bin +; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll +; RUN: clang -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin ; RUN: %t.bin 5 ; ModuleID = '/home/psrivas2/current-test/unitTests/twoNodeQuery.ll' target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" @@ -16,7 +17,10 @@ declare i8* @llvm.visc.createNode(i8*) #0 declare i8* @llvm.visc.createEdge(i8*, i8*, i1, i32, i32) #0 ; Function Attrs: nounwind -declare i8* @llvm.visc.launch(i8*, i8*, i32) #0 +declare i32 @llvm.visc.launch(i8**, i8*, i8*) #0 + +; Function Attrs: nounwind +declare i32 @llvm.visc.wait(i8*) #0 ; Function Attrs: nounwind declare i8* @llvm.visc.getNode() #0 @@ -36,15 +40,23 @@ declare void @llvm.visc.bind.output(i8*, i32, i32) ; Function Attrs: nounwind uwtable define i32 @main(i32 %argc, i8** nocapture %argv) #1 { entry: - %in.addr = alloca i32 + %in.addr = alloca { i32, %rtype } %arrayidx = getelementptr inbounds i8** %argv, i64 1 %0 = load i8** %arrayidx, align 8, !tbaa !0 %call.i = tail call i64 @strtol(i8* nocapture %0, i8** null, i32 10) #0 %conv.i = trunc i64 %call.i to i32 - store i32 %conv.i, i32* %in.addr - %args = bitcast i32* %in.addr to i8* - %launch = call i8* @llvm.visc.launch(i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args, i32 1) + %1 = bitcast { i32, %rtype }* %in.addr to i32* + store i32 %conv.i, i32* %1 + %args = bitcast { i32, %rtype }* %in.addr to i8* + %graphIDloc = alloca i8* + %launch = call i32 @llvm.visc.launch(i8** %graphIDloc, i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args) %call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %conv.i) #0 + %graphID = load i8** %graphIDloc + %wait = call i32 @llvm.visc.wait(i8* %graphID) + %2 = getelementptr { i32, %rtype }* %in.addr, i32 0, i32 1 + %outputstruct = load %rtype* %2 + %output = extractvalue %rtype %outputstruct, 0 + %call2 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %output) #0 ret i32 0 }