From 96fcb4fa0585aa32f5b6d8ac2818cb3042a45085 Mon Sep 17 00:00:00 2001
From: Prakalp Srivastava <psrivas2@illinois.edu>
Date: Fri, 5 Dec 2014 05:49:08 +0000
Subject: [PATCH] visc_gemm_ptx fail fixed. It was due to a problem with
 runtime and with PTX pass generating multiplication of local and global dim
 statements even in case of 1 level node, when local dim is null M   
 llvm/projects/visc-rt/visc-rt.cpp M   
 llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp

---
 .../lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp | 12 +++++++++---
 llvm/projects/visc-rt/visc-rt.cpp                    |  2 +-
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp b/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp
index 869ca5d592..47b7e18856 100644
--- a/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp
+++ b/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp
@@ -1337,12 +1337,18 @@ static void getExecuteNodeParams(Value* &workDim, Value* &LocalWGPtr, Value*
     if(isa<Argument>(kernel->globalWGSize[i]))
       kernel->globalWGSize[i] = VMap[kernel->globalWGSize[i]];
   }
+
   // For OpenCL, global work group size is the total bumber of instances in each
   // dimension. So, multiply local and global dim limits.
   std::vector<Value*> globalWGSizeInsts;
-  for (unsigned i = 0; i < kernel->gridDim; i++) {
-    BinaryOperator* MulInst = BinaryOperator::Create(Instruction::Mul, kernel->globalWGSize[i], kernel->localWGSize[i], "", IB);
-    globalWGSizeInsts.push_back(MulInst);
+  if(kernel->hasLocalWG()) {
+    for (unsigned i = 0; i < kernel->gridDim; i++) {
+      BinaryOperator* MulInst = BinaryOperator::Create(Instruction::Mul, kernel->globalWGSize[i], kernel->localWGSize[i], "", IB);
+      globalWGSizeInsts.push_back(MulInst);
+    }
+  }
+  else {
+    globalWGSizeInsts = kernel->globalWGSize;
   }
   GlobalWGPtr = genWorkGroupPtr(globalWGSizeInsts, VMap, IB, "GlobalWGSize");
   DEBUG(errs() << "Pointer to global work group: " << *GlobalWGPtr << "\n");
diff --git a/llvm/projects/visc-rt/visc-rt.cpp b/llvm/projects/visc-rt/visc-rt.cpp
index 7186008fe8..c18e2e21cf 100644
--- a/llvm/projects/visc-rt/visc-rt.cpp
+++ b/llvm/projects/visc-rt/visc-rt.cpp
@@ -182,7 +182,7 @@ void* llvm_visc_ptx_executeNode(void* graphID, unsigned workDim , const size_t*
   }
 
   cl_int errcode = clEnqueueNDRangeKernel(Context->clCommandQue,
-      Context->clKernel, workDim, NULL, GlobalWG, LocalWG, 0, NULL, NULL);
+      Context->clKernel, workDim, NULL, GlobalWG, (localWorkSize == NULL)? NULL :  LocalWG, 0, NULL, NULL);
   DEBUG(cout << "Enqueued kernel\n");
   checkErr(errcode, CL_SUCCESS, "Failure to enqueue kernel");
   return event;
-- 
GitLab