diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/visc/main.cc b/hpvm/test/parboil/benchmarks/sgemm/src/visc/main.cc index 6f4c8633fb5753802ee2c66157066fa0c49ff2c0..22d9098646f3ca1e8582a319e93e68e9a58e42d8 100644 --- a/hpvm/test/parboil/benchmarks/sgemm/src/visc/main.cc +++ b/hpvm/test/parboil/benchmarks/sgemm/src/visc/main.cc @@ -37,14 +37,36 @@ extern char* readFile(const char*); exit(1); \ } -void mysgemmNT( float* A, int lda, float* B, int ldb, float* C, int ldc, int k, float alpha, float beta ) -{ +typedef struct __attribute__((__packed__)) { + float* A; size_t bytes_A; + int lda; + float* B; size_t bytes_B; + int ldb; + float* C; size_t bytes_C; + int ldc; + int k; float alpha; float beta; + size_t dim_X1, dim_Y1, dim_X2, dim_Y2; +} RootIn; + +void mysgemmNT( + float* A, size_t bytes_A, int lda, float* B, size_t bytes_B, int ldb, float* C, size_t bytes_C, + int ldc, int k, float alpha, float beta +) { __visc__hint(visc::DEVICE); __visc__attributes(3, A, B, C, 1, C); - float c = 0.0f; - int m = get_global_id(0); - int n = get_global_id(1); + void* thisNode = __visc__getNode(); + void* parentNode = __visc__getParentNode(thisNode); + int lx = __visc__getNodeInstanceID_x(thisNode); + int ly = __visc__getNodeInstanceID_y(thisNode); + int gx = __visc__getNodeInstanceID_x(parentNode); + int gy = __visc__getNodeInstanceID_y(parentNode); + int gridx = __visc__getNumNodeInstances_x(thisNode); + int gridy = __visc__getNumNodeInstances_y(thisNode); + int m = gx * gridx + lx; + int n = gy * gridy + ly; + + float c = 0.0f; for (int i = 0; i < k; ++i) { float a = A[m + i * lda]; float b = B[n + i * ldb]; @@ -53,8 +75,83 @@ void mysgemmNT( float* A, int lda, float* B, int ldb, float* C, int ldc, int k, C[m+n*ldc] = C[m+n*ldc] * beta + alpha * c; } -__attribute__((noinline)) void basicSgemm( char transa, char transb, int m, int n, int k, float alpha, float* A, size_t bytesA, int lda, float* B, size_t bytesB, int ldb, float beta, float* C, size_t bytesC, int ldc ) -{ +void basicSgemmLvl1( + float* A, size_t bytes_A, int lda, float* B, size_t bytes_B, int ldb, float* C, size_t bytes_C, int ldc, + int k, float alpha, float beta, size_t dim_X1, size_t dim_Y1 +) { + __visc__hint(visc::DEVICE); + __visc__attributes(3, A, B, C, 1, C); + void* sgemm_node = __visc__createNodeND(2, mysgemmNT, (size_t) dim_X1, (size_t) dim_Y1); + __visc__bindIn(sgemm_node, 0, 0, 0); + __visc__bindIn(sgemm_node, 1, 1, 0); + __visc__bindIn(sgemm_node, 2, 2, 0); + __visc__bindIn(sgemm_node, 3, 3, 0); + __visc__bindIn(sgemm_node, 4, 4, 0); + __visc__bindIn(sgemm_node, 5, 5, 0); + __visc__bindIn(sgemm_node, 6, 6, 0); + __visc__bindIn(sgemm_node, 7, 7, 0); + __visc__bindIn(sgemm_node, 8, 8, 0); + __visc__bindIn(sgemm_node, 9, 9, 0); + __visc__bindIn(sgemm_node, 10, 10, 0); + __visc__bindIn(sgemm_node, 11, 11, 0); +} + +void basicSgemmLvl2( + float* A, size_t bytes_A, int lda, float* B, size_t bytes_B, int ldb, float* C, size_t bytes_C, int ldc, + int k, float alpha, float beta, + size_t dim_X1, size_t dim_Y1, size_t dim_X2, size_t dim_Y2 +) { + __visc__hint(visc::CPU_TARGET); + __visc__attributes(3, A, B, C, 1, C); + void* sgemm_node = __visc__createNodeND(2, basicSgemmLvl1, (size_t) dim_X2, (size_t) dim_Y2); + __visc__bindIn(sgemm_node, 0, 0, 0); + __visc__bindIn(sgemm_node, 1, 1, 0); + __visc__bindIn(sgemm_node, 2, 2, 0); + __visc__bindIn(sgemm_node, 3, 3, 0); + __visc__bindIn(sgemm_node, 4, 4, 0); + __visc__bindIn(sgemm_node, 5, 5, 0); + __visc__bindIn(sgemm_node, 6, 6, 0); + __visc__bindIn(sgemm_node, 7, 7, 0); + __visc__bindIn(sgemm_node, 8, 8, 0); + __visc__bindIn(sgemm_node, 9, 9, 0); + __visc__bindIn(sgemm_node, 10, 10, 0); + __visc__bindIn(sgemm_node, 11, 11, 0); + __visc__bindIn(sgemm_node, 12, 12, 0); + __visc__bindIn(sgemm_node, 13, 13, 0); +} + +// A wrapper level used in codegen for some backends +void basicSgemmLvl3( + float* A, size_t bytes_A, int lda, float* B, size_t bytes_B, int ldb, float* C, size_t bytes_C, int ldc, + int k, float alpha, float beta, + size_t dim_X1, size_t dim_Y1, size_t dim_X2, size_t dim_Y2 +) { + __visc__hint(visc::CPU_TARGET); + __visc__attributes(3, A, B, C, 1, C); + void* sgemm_node = __visc__createNodeND(0, basicSgemmLvl2); + __visc__bindIn(sgemm_node, 0, 0, 0); + __visc__bindIn(sgemm_node, 1, 1, 0); + __visc__bindIn(sgemm_node, 2, 2, 0); + __visc__bindIn(sgemm_node, 3, 3, 0); + __visc__bindIn(sgemm_node, 4, 4, 0); + __visc__bindIn(sgemm_node, 5, 5, 0); + __visc__bindIn(sgemm_node, 6, 6, 0); + __visc__bindIn(sgemm_node, 7, 7, 0); + __visc__bindIn(sgemm_node, 8, 8, 0); + __visc__bindIn(sgemm_node, 9, 9, 0); + __visc__bindIn(sgemm_node, 10, 10, 0); + __visc__bindIn(sgemm_node, 11, 11, 0); + __visc__bindIn(sgemm_node, 12, 12, 0); + __visc__bindIn(sgemm_node, 13, 13, 0); + __visc__bindIn(sgemm_node, 14, 14, 0); + __visc__bindIn(sgemm_node, 15, 15, 0); +} + +__attribute__((noinline)) void basicSgemm( + char transa, char transb, int m, int n, int k, float alpha, + float* A, size_t bytesA, int lda, float* B, size_t bytesB, int ldb, float beta, + float* C, size_t bytesC, int ldc +) { if ((transa != 'N') && (transa != 'n')) { std::cerr << "unsupported value of 'transa' in regtileSgemm()" << std::endl; return; @@ -71,10 +168,18 @@ __attribute__((noinline)) void basicSgemm( char transa, char transb, int m, int << "; n should be multiple of " << TILE_SZ << std::endl; } - unsigned long db[2] = {TILE_SZ,TILE_SZ}; - unsigned long dg[2] = {m/TILE_SZ*db[0],n/TILE_SZ*db[1]}; - - void* sgemmDFG = __visc__node(mysgemmNT, 2, 2, db[0], db[1], dg[0]/db[0], dg[1]/db[1], 12, A, bytesA, lda, B, bytesB, ldb, C, bytesC, ldc, k, alpha, beta, 0); + size_t db[2] = {TILE_SZ,TILE_SZ}, dg[2] = {m/TILE_SZ*db[0],n/TILE_SZ*db[1]}; + + void *root_in = malloc(sizeof(RootIn)); + RootIn root_in_local = { + A, bytesA, lda, + B, bytesB, ldb, + C, bytesC, ldc, + k, alpha, beta, + db[0], db[1], dg[0]/db[0], dg[1]/db[1] + }; + *(RootIn *)root_in = root_in_local; + void* sgemmDFG = __visc__launch(0, basicSgemmLvl3, root_in); __visc__wait(sgemmDFG); } @@ -174,3 +279,4 @@ int main (int argc, char *argv[]) { return 0; } +