diff --git a/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp b/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp index a37d9c152504c9f4271daceff81ff5be83ec292c..48d65ec3b4113146ac4e79be681d4e1fc06c221a 100644 --- a/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp +++ b/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp @@ -409,17 +409,40 @@ namespace { ArrayRef<Value*>(GraphID), "", RI); - // Read Output + // Read Output Struct Value* GetOutputArgs[] = {GraphID, d_Output, outputSize}; CallInst* h_Output = CallInst::Create(llvm_visc_ptx_getOutput, ArrayRef<Value*>(GetOutputArgs, 3), - "h_output."+CF->getName(), + "h_output."+CF->getName()+".addr", RI); - // Prepare output + // Read each device pointer listed in output struct + // Load the output struct CastInst* BI = BitCastInst::CreatePointerCast(h_Output, CF->getReturnType(), "output.ptr", RI); - LoadInst* KernelOutput = new LoadInst(BI, "output."+CF->getName(), RI); + Value* KernelOutput = new LoadInst(BI, "", RI); + for(unsigned i=0; i < OutputTy->getNumElements(); i++) { + Type* elemTy = OutputTy->getElementType(i); + if(elemTy->isPointerTy()) { + // Pointer type + assert(OutputTy->getElementType(i+1)->isIntegerTy() + && "Every Pointer type must be followed by an integer"); + ExtractValueInst* d_ptr = ExtractValueInst::Create(KernelOutput, ArrayRef<unsigned>(i), "", RI); + ExtractValueInst* len = ExtractValueInst::Create(KernelOutput, ArrayRef<unsigned>(i+1), "", RI); + // GetOutputPtr call + Value* GetOutputArgs[] = {GraphID, + d_ptr, + len}; + CallInst* h_ptr = CallInst::Create(llvm_visc_ptx_getOutput, + ArrayRef<Value*>(GetOutputArgs, 3), + "", + RI); + KernelOutput = InsertValueInst::Create(KernelOutput, h_ptr, ArrayRef<unsigned>(i), "", RI); + + } + } + // Prepare output + KernelOutput->setName("output."+CF->getName()); OutputMap[C] = KernelOutput; DEBUG(errs() << "*** Generating epilogue code for the function****\n"); diff --git a/llvm/projects/visc-rt/visc-rt.cpp b/llvm/projects/visc-rt/visc-rt.cpp index d7e3cd530ccd032057a1fa3602562103883fa24f..0a5eab8a87d548747c0ceb9352c044b63e91e1b3 100644 --- a/llvm/projects/visc-rt/visc-rt.cpp +++ b/llvm/projects/visc-rt/visc-rt.cpp @@ -15,11 +15,6 @@ typedef struct { cl_kernel clKernel; } DFNodeContext_PTX; -typedef struct { - cl_mem d_elem; - size_t size; -} OutputTy; - cl_context globalGPUContext; static inline void checkErr(cl_int err, cl_int success, const char * name) { @@ -123,18 +118,6 @@ void* llvm_visc_ptx_getOutput(void* graphID, void* d_output, size_t size) { cl_int errcode = clEnqueueReadBuffer(Context->clCommandQue, (cl_mem)d_output, CL_TRUE, 0, size, h_output, 0, NULL, NULL); checkErr(errcode, CL_SUCCESS, "Failure to read output"); - // Assuming all output is in the format of device pointer followed by size of - // output size format - OutputTy* Output = (OutputTy*) h_output; - unsigned numElems = size/sizeof(OutputTy); - for(unsigned i = 0; i < numElems; i++) { - OutputTy& outputElem = Output[i]; - void* h_outputElem = malloc(outputElem.size); - errcode = clEnqueueReadBuffer(Context->clCommandQue, outputElem.d_elem, CL_TRUE, 0, - outputElem.size, h_outputElem, 0, NULL, NULL); - checkErr(errcode, CL_SUCCESS, "Failure to read output"); - Output[i].d_elem = (cl_mem) h_outputElem; - } return h_output; }