diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h index f42f93f6c1d6d376e22bbe715bafa76b820f00f7..edf4a80b11e6dd14744c4d2f5c13d34fafee588b 100644 --- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h +++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h @@ -20,8 +20,8 @@ namespace llvm { enum AddressSpace { - ADDRESS_SPACE_GENERIC = 1, - ADDRESS_SPACE_GLOBAL = 0, + ADDRESS_SPACE_GENERIC = 0, + ADDRESS_SPACE_GLOBAL = 1, ADDRESS_SPACE_SHARED = 3, ADDRESS_SPACE_CONST = 4, ADDRESS_SPACE_LOCAL = 5, diff --git a/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp b/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp index 942b79f6861921e7aee6a637b5b6ed3b70948322..c614d2e4ce1190827b7278e98624ec1888638971 100644 --- a/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp +++ b/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp @@ -904,7 +904,6 @@ void CGT_NVPTX::codeGen(DFLeafNode* N) { transformFunctionToVoid(F_nvptx); - //FIXME: For now, assume only one allocation node kernel->AllocationNode = NULL; @@ -940,11 +939,28 @@ void CGT_NVPTX::codeGen(DFLeafNode* N) { } std::sort(SharedMemArgs.begin(), SharedMemArgs.end()); + // All pointer args which are not shared memory pointers have to be moved to + // global address space + unsigned argIndex = 0; + std::vector<unsigned> GlobalMemArgs; + for(auto& Arg: F_nvptx->getArgumentList()) { + if (Arg.getType()->isPointerTy()) { + // If the arguement is already chosen for shared memory arguemnt list, skip. + // Else put it in Global memory arguement list + if(std::count(SharedMemArgs.begin(), SharedMemArgs.end(), argIndex) == 0) { + GlobalMemArgs.push_back(argIndex); + } + } + argIndex++; + } + std::sort(GlobalMemArgs.begin(), GlobalMemArgs.end()); + /* At this point, we assume that chescks for the fact that SharedMemArgs only contains pointer arguments to GLOBAL_ADDRSPACE have been performed by the analysis pass */ changeArgAddrspace(F_nvptx, SharedMemArgs, SHARED_ADDRSPACE); + changeArgAddrspace(F_nvptx, GlobalMemArgs, GLOBAL_ADDRSPACE); // Go through all the instructions diff --git a/llvm/test/VISC/parboil/benchmarks/sgemm/src/visc_sh/main.cc b/llvm/test/VISC/parboil/benchmarks/sgemm/src/visc_sh/main.cc index c40ce36c4b128587be061a6262f7986dadd08dbc..0e994d478b5b5526f000068050822468a11eda15 100644 --- a/llvm/test/VISC/parboil/benchmarks/sgemm/src/visc_sh/main.cc +++ b/llvm/test/VISC/parboil/benchmarks/sgemm/src/visc_sh/main.cc @@ -254,8 +254,8 @@ __attribute__((noinline)) void basicSgemm( char transa, char transb, int m, int int block_x = TILE_N; int block_y = TILE_TB_HEIGHT; - int grid_x = m*TILE_N/TILE_M; - int grid_y = n*TILE_TB_HEIGHT/TILE_N; + int grid_x = m/TILE_M; + int grid_y = n/TILE_N; // Pack data in struct RootIn* args = (RootIn*) malloc(sizeof(RootIn)); packData(args, @@ -289,7 +289,6 @@ int main (int argc, char *argv[]) { int matBrow, matBcol; std::vector<float> matA, matBT; - /* Read command line. Expect 3 inputs: A, B and B^T in column-major layout*/ params = pb_ReadParameters(&argc, argv);