diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
index f42f93f6c1d6d376e22bbe715bafa76b820f00f7..edf4a80b11e6dd14744c4d2f5c13d34fafee588b 100644
--- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
+++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
@@ -20,8 +20,8 @@
 namespace llvm {
 
 enum AddressSpace {
-  ADDRESS_SPACE_GENERIC = 1,
-  ADDRESS_SPACE_GLOBAL = 0,
+  ADDRESS_SPACE_GENERIC = 0,
+  ADDRESS_SPACE_GLOBAL = 1,
   ADDRESS_SPACE_SHARED = 3,
   ADDRESS_SPACE_CONST = 4,
   ADDRESS_SPACE_LOCAL = 5,
diff --git a/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp b/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp
index 942b79f6861921e7aee6a637b5b6ed3b70948322..c614d2e4ce1190827b7278e98624ec1888638971 100644
--- a/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp
+++ b/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp
@@ -904,7 +904,6 @@ void CGT_NVPTX::codeGen(DFLeafNode* N) {
   
   transformFunctionToVoid(F_nvptx);
 
-
   //FIXME: For now, assume only one allocation node
   kernel->AllocationNode = NULL;
 
@@ -940,11 +939,28 @@ void CGT_NVPTX::codeGen(DFLeafNode* N) {
   }
   std::sort(SharedMemArgs.begin(), SharedMemArgs.end());
 
+  // All pointer args which are not shared memory pointers have to be moved to
+  // global address space
+  unsigned argIndex = 0;
+  std::vector<unsigned> GlobalMemArgs;
+  for(auto& Arg: F_nvptx->getArgumentList()) {
+    if (Arg.getType()->isPointerTy()) {
+      // If the arguement is already chosen for shared memory arguemnt list, skip.
+      // Else put it in Global memory arguement list
+      if(std::count(SharedMemArgs.begin(), SharedMemArgs.end(), argIndex) == 0) {
+        GlobalMemArgs.push_back(argIndex);
+      }
+    }
+    argIndex++;
+  }
+  std::sort(GlobalMemArgs.begin(), GlobalMemArgs.end());
+
   /* At this point, we assume that chescks for the fact that SharedMemArgs only
      contains pointer arguments to GLOBAL_ADDRSPACE have been performed by the
      analysis pass */
 
   changeArgAddrspace(F_nvptx, SharedMemArgs, SHARED_ADDRSPACE);
+  changeArgAddrspace(F_nvptx, GlobalMemArgs, GLOBAL_ADDRSPACE);
 
 
   // Go through all the instructions
diff --git a/llvm/test/VISC/parboil/benchmarks/sgemm/src/visc_sh/main.cc b/llvm/test/VISC/parboil/benchmarks/sgemm/src/visc_sh/main.cc
index c40ce36c4b128587be061a6262f7986dadd08dbc..0e994d478b5b5526f000068050822468a11eda15 100644
--- a/llvm/test/VISC/parboil/benchmarks/sgemm/src/visc_sh/main.cc
+++ b/llvm/test/VISC/parboil/benchmarks/sgemm/src/visc_sh/main.cc
@@ -254,8 +254,8 @@ __attribute__((noinline)) void basicSgemm( char transa, char transb, int m, int
 
   int block_x = TILE_N;
   int block_y = TILE_TB_HEIGHT;
-  int grid_x = m*TILE_N/TILE_M;
-  int grid_y = n*TILE_TB_HEIGHT/TILE_N;
+  int grid_x = m/TILE_M;
+  int grid_y = n/TILE_N;
   // Pack data in struct
   RootIn* args = (RootIn*) malloc(sizeof(RootIn));
   packData(args, 
@@ -289,7 +289,6 @@ int main (int argc, char *argv[]) {
     int matBrow, matBcol;
     std::vector<float> matA, matBT;
 
-
     /* Read command line. Expect 3 inputs: A, B and B^T
        in column-major layout*/
     params = pb_ReadParameters(&argc, argv);