From 75b7dfd0d4189a162b359c1ff3182fbd9a1b31a6 Mon Sep 17 00:00:00 2001
From: Prakalp Srivastava <psrivas2@illinois.edu>
Date: Tue, 8 Mar 2016 01:04:32 -0600
Subject: [PATCH] (1) Generating correct addrspace for global pointers in
 opencl kernels (2) Fixed a bug in sgemm visc_sh version. Wrong grid size was
 used earlier, resulting in the bug

---
 .../Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h  |  4 ++--
 .../DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp          | 18 +++++++++++++++++-
 .../benchmarks/sgemm/src/visc_sh/main.cc       |  5 ++---
 3 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
index f42f93f6c1..edf4a80b11 100644
--- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
+++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
@@ -20,8 +20,8 @@
 namespace llvm {
 
 enum AddressSpace {
-  ADDRESS_SPACE_GENERIC = 1,
-  ADDRESS_SPACE_GLOBAL = 0,
+  ADDRESS_SPACE_GENERIC = 0,
+  ADDRESS_SPACE_GLOBAL = 1,
   ADDRESS_SPACE_SHARED = 3,
   ADDRESS_SPACE_CONST = 4,
   ADDRESS_SPACE_LOCAL = 5,
diff --git a/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp b/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp
index 942b79f686..c614d2e4ce 100644
--- a/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp
+++ b/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp
@@ -904,7 +904,6 @@ void CGT_NVPTX::codeGen(DFLeafNode* N) {
   
   transformFunctionToVoid(F_nvptx);
 
-
   //FIXME: For now, assume only one allocation node
   kernel->AllocationNode = NULL;
 
@@ -940,11 +939,28 @@ void CGT_NVPTX::codeGen(DFLeafNode* N) {
   }
   std::sort(SharedMemArgs.begin(), SharedMemArgs.end());
 
+  // All pointer args which are not shared memory pointers have to be moved to
+  // global address space
+  unsigned argIndex = 0;
+  std::vector<unsigned> GlobalMemArgs;
+  for(auto& Arg: F_nvptx->getArgumentList()) {
+    if (Arg.getType()->isPointerTy()) {
+      // If the arguement is already chosen for shared memory arguemnt list, skip.
+      // Else put it in Global memory arguement list
+      if(std::count(SharedMemArgs.begin(), SharedMemArgs.end(), argIndex) == 0) {
+        GlobalMemArgs.push_back(argIndex);
+      }
+    }
+    argIndex++;
+  }
+  std::sort(GlobalMemArgs.begin(), GlobalMemArgs.end());
+
   /* At this point, we assume that chescks for the fact that SharedMemArgs only
      contains pointer arguments to GLOBAL_ADDRSPACE have been performed by the
      analysis pass */
 
   changeArgAddrspace(F_nvptx, SharedMemArgs, SHARED_ADDRSPACE);
+  changeArgAddrspace(F_nvptx, GlobalMemArgs, GLOBAL_ADDRSPACE);
 
 
   // Go through all the instructions
diff --git a/llvm/test/VISC/parboil/benchmarks/sgemm/src/visc_sh/main.cc b/llvm/test/VISC/parboil/benchmarks/sgemm/src/visc_sh/main.cc
index c40ce36c4b..0e994d478b 100644
--- a/llvm/test/VISC/parboil/benchmarks/sgemm/src/visc_sh/main.cc
+++ b/llvm/test/VISC/parboil/benchmarks/sgemm/src/visc_sh/main.cc
@@ -254,8 +254,8 @@ __attribute__((noinline)) void basicSgemm( char transa, char transb, int m, int
 
   int block_x = TILE_N;
   int block_y = TILE_TB_HEIGHT;
-  int grid_x = m*TILE_N/TILE_M;
-  int grid_y = n*TILE_TB_HEIGHT/TILE_N;
+  int grid_x = m/TILE_M;
+  int grid_y = n/TILE_N;
   // Pack data in struct
   RootIn* args = (RootIn*) malloc(sizeof(RootIn));
   packData(args, 
@@ -289,7 +289,6 @@ int main (int argc, char *argv[]) {
     int matBrow, matBcol;
     std::vector<float> matA, matBT;
 
-
     /* Read command line. Expect 3 inputs: A, B and B^T
        in column-major layout*/
     params = pb_ReadParameters(&argc, argv);
-- 
GitLab