From c500ad881a0cf6296e749c485cdfe96e8d517694 Mon Sep 17 00:00:00 2001 From: Prakalp Srivastava <psrivas2@illinois.edu> Date: Thu, 16 Oct 2014 07:34:42 +0000 Subject: [PATCH] More robust implementation of reading multiple device pointers and scalars --- .../DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp | 31 ++++++++++++++++--- llvm/projects/visc-rt/visc-rt.cpp | 17 ---------- 2 files changed, 27 insertions(+), 21 deletions(-) diff --git a/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp b/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp index a37d9c1525..48d65ec3b4 100644 --- a/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp +++ b/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp @@ -409,17 +409,40 @@ namespace { ArrayRef<Value*>(GraphID), "", RI); - // Read Output + // Read Output Struct Value* GetOutputArgs[] = {GraphID, d_Output, outputSize}; CallInst* h_Output = CallInst::Create(llvm_visc_ptx_getOutput, ArrayRef<Value*>(GetOutputArgs, 3), - "h_output."+CF->getName(), + "h_output."+CF->getName()+".addr", RI); - // Prepare output + // Read each device pointer listed in output struct + // Load the output struct CastInst* BI = BitCastInst::CreatePointerCast(h_Output, CF->getReturnType(), "output.ptr", RI); - LoadInst* KernelOutput = new LoadInst(BI, "output."+CF->getName(), RI); + Value* KernelOutput = new LoadInst(BI, "", RI); + for(unsigned i=0; i < OutputTy->getNumElements(); i++) { + Type* elemTy = OutputTy->getElementType(i); + if(elemTy->isPointerTy()) { + // Pointer type + assert(OutputTy->getElementType(i+1)->isIntegerTy() + && "Every Pointer type must be followed by an integer"); + ExtractValueInst* d_ptr = ExtractValueInst::Create(KernelOutput, ArrayRef<unsigned>(i), "", RI); + ExtractValueInst* len = ExtractValueInst::Create(KernelOutput, ArrayRef<unsigned>(i+1), "", RI); + // GetOutputPtr call + Value* GetOutputArgs[] = {GraphID, + d_ptr, + len}; + CallInst* h_ptr = CallInst::Create(llvm_visc_ptx_getOutput, + ArrayRef<Value*>(GetOutputArgs, 3), + "", + RI); + KernelOutput = InsertValueInst::Create(KernelOutput, h_ptr, ArrayRef<unsigned>(i), "", RI); + + } + } + // Prepare output + KernelOutput->setName("output."+CF->getName()); OutputMap[C] = KernelOutput; DEBUG(errs() << "*** Generating epilogue code for the function****\n"); diff --git a/llvm/projects/visc-rt/visc-rt.cpp b/llvm/projects/visc-rt/visc-rt.cpp index d7e3cd530c..0a5eab8a87 100644 --- a/llvm/projects/visc-rt/visc-rt.cpp +++ b/llvm/projects/visc-rt/visc-rt.cpp @@ -15,11 +15,6 @@ typedef struct { cl_kernel clKernel; } DFNodeContext_PTX; -typedef struct { - cl_mem d_elem; - size_t size; -} OutputTy; - cl_context globalGPUContext; static inline void checkErr(cl_int err, cl_int success, const char * name) { @@ -123,18 +118,6 @@ void* llvm_visc_ptx_getOutput(void* graphID, void* d_output, size_t size) { cl_int errcode = clEnqueueReadBuffer(Context->clCommandQue, (cl_mem)d_output, CL_TRUE, 0, size, h_output, 0, NULL, NULL); checkErr(errcode, CL_SUCCESS, "Failure to read output"); - // Assuming all output is in the format of device pointer followed by size of - // output size format - OutputTy* Output = (OutputTy*) h_output; - unsigned numElems = size/sizeof(OutputTy); - for(unsigned i = 0; i < numElems; i++) { - OutputTy& outputElem = Output[i]; - void* h_outputElem = malloc(outputElem.size); - errcode = clEnqueueReadBuffer(Context->clCommandQue, outputElem.d_elem, CL_TRUE, 0, - outputElem.size, h_outputElem, 0, NULL, NULL); - checkErr(errcode, CL_SUCCESS, "Failure to read output"); - Output[i].d_elem = (cl_mem) h_outputElem; - } return h_output; } -- GitLab