diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_opt/Makefile b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_opt/Makefile deleted file mode 100644 index 2234bf54e1e665f95b38dd0e25c2fe1b5539ce4e..0000000000000000000000000000000000000000 --- a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_opt/Makefile +++ /dev/null @@ -1,8 +0,0 @@ -# (c) 2010 The Board of Trustees of the University of Illinois. - -LANGUAGE=hpvm -SRCDIR_OBJS=io.ll #compute_gold.o -HPVM_OBJS=main.hpvm.ll -APP_CUDALDFLAGS=-lm -lstdc++ -APP_CFLAGS=-ffast-math -O3 -APP_CXXFLAGS=-ffast-math -O3 diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_opt/io.cc b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_opt/io.cc deleted file mode 100644 index 04744f404ebaf6e669c2bbe91600519742b57dc9..0000000000000000000000000000000000000000 --- a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_opt/io.cc +++ /dev/null @@ -1,84 +0,0 @@ -/*************************************************************************** - *cr - *cr (C) Copyright 2010 The Board of Trustees of the - *cr University of Illinois - *cr All Rights Reserved - *cr - ***************************************************************************/ - -/* I/O routines for reading and writing matrices in column-major - * layout - */ - -#include <fstream> -#include <iostream> -#include <vector> - -char *readFile(const char *fileName) { - std::fstream f(fileName, std::fstream::in); - if (!f.good()) { - std::cerr << "Error Reading File!!" << std::endl; - return NULL; - } - - f.seekg(0, std::ios::end); - int length = f.tellg(); - f.seekg(0, std::ios::beg); - - char *buffer; - - if (length > 0) { - buffer = new char[length]; - f.read(buffer, length); - buffer[length - 1] = 0; - } else { - buffer = new char; - buffer[0] = 0; - } - - f.close(); - - return buffer; -} - -bool readColMajorMatrixFile(const char *fn, int &nr_row, int &nr_col, - std::vector<float> &v) { - std::cerr << "Opening file:" << fn << std::endl; - std::fstream f(fn, std::fstream::in); - if (!f.good()) { - return false; - } - - // Read # of rows and cols - f >> nr_row; - f >> nr_col; - - float data; - std::cerr << "Matrix dimension: " << nr_row << "x" << nr_col << std::endl; - while (f.good()) { - f >> data; - v.push_back(data); - } - v.pop_back(); // remove the duplicated last element - return true; -} - -bool writeColMajorMatrixFile(const char *fn, int nr_row, int nr_col, - std::vector<float> &v) { - std::cerr << "Opening file:" << fn << " for write." << std::endl; - std::fstream f(fn, std::fstream::out); - if (!f.good()) { - return false; - } - - // Read # of rows and cols - f << nr_row << " " << nr_col << " "; - - float data; - std::cerr << "Matrix dimension: " << nr_row << "x" << nr_col << std::endl; - for (int i = 0; i < v.size(); ++i) { - f << v[i] << ' '; - } - f << "\n"; - return true; -} diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_opt/kernel.cl b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_opt/kernel.cl deleted file mode 100644 index ae0f5b60f4b800515bd84a04b02926acd625665c..0000000000000000000000000000000000000000 --- a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_opt/kernel.cl +++ /dev/null @@ -1,40 +0,0 @@ -/*************************************************************************** - *cr - *cr (C) Copyright 2010 The Board of Trustees of the - *cr University of Illinois - *cr All Rights Reserved - *cr - ***************************************************************************/ - -/* - * Kernel of dense matrix-matrix multiplication kernel. - */ - -__kernel void mysgemmNT( __global const float *A, int lda, __global const float *B, int ldb, __global float* C, int ldc, int k, float alpha, float beta ) -{ - // Partial results - float c[TILE_N]; - for (int i=0; i < TILE_N; i++) - c[i] = 0.0f; - - int mid = get_local_id(1)*get_local_size(0)+get_local_id(0); - int m = get_group_id(0) * TILE_M + mid; - - int b_base = 0; - - for (int i = 0; i < k; i+=TILE_TB_HEIGHT) { - float a; - b_base = get_group_id(1) * TILE_N + i * ldb; - - for (int j = 0; j < TILE_TB_HEIGHT; j++) { - a = A[m + (i+j)*lda]; - for (int kk = 0; kk < TILE_N; kk++) - c[kk] += a * B[b_base + j * ldb + kk]; - - } - } - int t = ldc * get_group_id(1) * TILE_N + m; - for (int i = 0; i < TILE_N; i++) { - C[t+i*ldc] = C[t+i*ldc] * beta + alpha * c[i]; - } -} diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_opt/main.cc b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_opt/main.cc deleted file mode 100644 index a1db2e56a5c5639319d7be5f6a890d44c3a28421..0000000000000000000000000000000000000000 --- a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_opt/main.cc +++ /dev/null @@ -1,186 +0,0 @@ -/*************************************************************************** - *cr - *cr (C) Copyright 2010 The Board of Trustees of the - *cr University of Illinois - *cr All Rights Reserved - *cr - ***************************************************************************/ - -/* - * Main entry of dense matrix-matrix multiplication kernel - */ - -#include <hpvm.h> -#include <iostream> -#include <malloc.h> -#include <math.h> -#include <parboil.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sys/time.h> -#include <vector> - -// I/O routines -extern bool readColMajorMatrixFile(const char *fn, int &nr_row, int &nr_col, - std::vector<float> &v); -extern bool writeColMajorMatrixFile(const char *fn, int, int, - std::vector<float> &); -extern char *readFile(const char *); - -// Parameters of tile sizes -#define TILE_N 16 -#define TILE_TB_HEIGHT 8 -#define TILE_M (TILE_N * TILE_TB_HEIGHT) - -#define CHECK_ERROR(errorMessage) \ - if (clStatus != CL_SUCCESS) { \ - std::cout << errorMessage << " Error!\n"; \ - std::cout << "Line: " << __LINE__ << "\n"; \ - exit(1); \ - } - -void mysgemmNT(float *A, int lda, float *B, int ldb, float *C, int ldc, int k, - float alpha, float beta) { - __hpvm__hint(hpvm::GPU_TARGET); - __hpvm__attributes(3, A, B, C, 1, C); - - float c[TILE_N]; - for (int i = 0; i < TILE_N; i++) - c[i] = 0.0f; - - int mid = get_local_id(1) * get_local_size(0) + get_local_id(0); - int m = get_group_id(0) * TILE_M + mid; - - int b_base = 0; - - for (int i = 0; i < k; i += TILE_TB_HEIGHT) { - float a; - b_base = get_group_id(1) * TILE_N + i * ldb; - - for (int j = 0; j < TILE_TB_HEIGHT; j++) { - a = A[m + (i + j) * lda]; - for (int kk = 0; kk < TILE_N; kk++) - c[kk] += a * B[b_base + j * ldb + kk]; - } - } - int t = ldc * get_group_id(1) * TILE_N + m; - for (int i = 0; i < TILE_N; i++) { - C[t + i * ldc] = C[t + i * ldc] * beta + alpha * c[i]; - } -} - -__attribute__((noinline)) void basicSgemm(char transa, char transb, int m, - int n, int k, float alpha, float *A, - size_t bytesA, int lda, float *B, - size_t bytesB, int ldb, float beta, - float *C, size_t bytesC, int ldc) { - if ((transa != 'N') && (transa != 'n')) { - std::cerr << "unsupported value of 'transa' in regtileSgemm()" << std::endl; - return; - } - - if ((transb != 'T') && (transb != 't')) { - std::cerr << "unsupported value of 'transb' in regtileSgemm()" << std::endl; - return; - } - - // In this code we assume the matrix sizes are multiple of tile size - if ((m % TILE_M) || (n % TILE_N)) { - std::cerr << "unsupported size of matrix. m should be multiple of " - << TILE_M << "; n should be multiple of " << TILE_N << std::endl; - return; - } - - unsigned db[2] = {TILE_N, TILE_TB_HEIGHT}; - // unsigned dg[2] = {m*TILE_N/TILE_M,n*TILE_TB_HEIGHT/TILE_N}; - unsigned dg[2] = {m * db[0] / TILE_M, n * db[1] / TILE_N}; - - unsigned sgemmDFG = __hpvm__node(mysgemmNT, 2, 2, db[0], db[1], dg[0] / db[0], - dg[1] / db[1], 12, A, bytesA, lda, B, bytesB, - ldb, C, bytesC, ldc, k, alpha, beta, 0); - __hpvm__wait(sgemmDFG); -} - -int main(int argc, char *argv[]) { - - struct pb_Parameters *params; - struct pb_TimerSet timers; - - size_t A_sz, B_sz, C_sz; - int matArow, matAcol; - int matBrow, matBcol; - std::vector<float> matA, matBT; - - /* Read command line. Expect 3 inputs: A, B and B^T - in column-major layout*/ - params = pb_ReadParameters(&argc, argv); - if ((params->inpFiles[0] == NULL) || (params->inpFiles[1] == NULL) || - (params->inpFiles[2] == NULL) || (params->inpFiles[3] != NULL)) { - fprintf(stderr, "Expecting three input filenames\n"); - exit(-1); - } - - /* Read in data */ - // load A - readColMajorMatrixFile(params->inpFiles[0], matArow, matAcol, matA); - - // load B^T - readColMajorMatrixFile(params->inpFiles[2], matBcol, matBrow, matBT); - - pb_InitializeTimerSet(&timers); - __hpvm__init(); - - pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); - // copy A to device memory - A_sz = matArow * matAcol * sizeof(float); - B_sz = matBrow * matBcol * sizeof(float); - - // allocate space for C - C_sz = matArow * matBcol * sizeof(float); - - // OpenCL memory allocation - std::vector<float> matC(matArow * matBcol); - - llvm_hpvm_track_mem(&matA.front(), A_sz); - llvm_hpvm_track_mem(&matBT.front(), B_sz); - llvm_hpvm_track_mem(&matC.front(), C_sz); - // Copy A and B^T into device memory - pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); - - for (size_t i = 0; i < matC.size(); i++) - matC[i] = 0.0f; - - pb_SwitchToTimer(&timers, pb_TimerID_NONE); - - // Use standard sgemm interface - basicSgemm('N', 'T', matArow, matBcol, matAcol, 1.0f, &matA.front(), A_sz, - matArow, &matBT.front(), B_sz, matBcol, 0.0f, &matC.front(), C_sz, - matArow); - - pb_SwitchToTimer(&timers, pb_TimerID_COPY); - llvm_hpvm_request_mem(&matC.front(), C_sz); - - pb_SwitchToTimer(&timers, hpvm_TimerID_MEM_UNTRACK); - llvm_hpvm_untrack_mem(&matA.front()); - llvm_hpvm_untrack_mem(&matBT.front()); - llvm_hpvm_untrack_mem(&matC.front()); - pb_SwitchToTimer(&timers, pb_TimerID_NONE); - - pb_PrintTimerSet(&timers); - __hpvm__cleanup(); - - if (params->outFile) { - - /* Write C to file */ - // pb_SwitchToTimer(&timers, pb_TimerID_IO); - writeColMajorMatrixFile(params->outFile, matArow, matBcol, matC); - } - - double GPUtime = pb_GetElapsedTime(&(timers.timers[pb_TimerID_KERNEL])); - std::cout << "GFLOPs = " << 2. * matArow * matBcol * matAcol / GPUtime / 1e9 - << std::endl; - pb_FreeParameters(params); - - return 0; -} diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_sh/Makefile b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_sh/Makefile deleted file mode 100644 index f81bac47072bc017dcdcdccf373cdfbd0f21ceac..0000000000000000000000000000000000000000 --- a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_sh/Makefile +++ /dev/null @@ -1,9 +0,0 @@ -# (c) 2010 The Board of Trustees of the University of Illinois. - -LANGUAGE=hpvm -SRCDIR_OBJS=io.ll #compute_gold.o -HPVM_OBJS=main.hpvm.ll -APP_CUDALDFLAGS=-lm -lstdc++ -APP_CFLAGS=-ffast-math -O3 -APP_CXXFLAGS=-ffast-math -O3 -APP_OPTFLAGS=-unroll-threshold=300 -loop-unroll -sroa diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_sh/io.cc b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_sh/io.cc deleted file mode 100644 index 04744f404ebaf6e669c2bbe91600519742b57dc9..0000000000000000000000000000000000000000 --- a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_sh/io.cc +++ /dev/null @@ -1,84 +0,0 @@ -/*************************************************************************** - *cr - *cr (C) Copyright 2010 The Board of Trustees of the - *cr University of Illinois - *cr All Rights Reserved - *cr - ***************************************************************************/ - -/* I/O routines for reading and writing matrices in column-major - * layout - */ - -#include <fstream> -#include <iostream> -#include <vector> - -char *readFile(const char *fileName) { - std::fstream f(fileName, std::fstream::in); - if (!f.good()) { - std::cerr << "Error Reading File!!" << std::endl; - return NULL; - } - - f.seekg(0, std::ios::end); - int length = f.tellg(); - f.seekg(0, std::ios::beg); - - char *buffer; - - if (length > 0) { - buffer = new char[length]; - f.read(buffer, length); - buffer[length - 1] = 0; - } else { - buffer = new char; - buffer[0] = 0; - } - - f.close(); - - return buffer; -} - -bool readColMajorMatrixFile(const char *fn, int &nr_row, int &nr_col, - std::vector<float> &v) { - std::cerr << "Opening file:" << fn << std::endl; - std::fstream f(fn, std::fstream::in); - if (!f.good()) { - return false; - } - - // Read # of rows and cols - f >> nr_row; - f >> nr_col; - - float data; - std::cerr << "Matrix dimension: " << nr_row << "x" << nr_col << std::endl; - while (f.good()) { - f >> data; - v.push_back(data); - } - v.pop_back(); // remove the duplicated last element - return true; -} - -bool writeColMajorMatrixFile(const char *fn, int nr_row, int nr_col, - std::vector<float> &v) { - std::cerr << "Opening file:" << fn << " for write." << std::endl; - std::fstream f(fn, std::fstream::out); - if (!f.good()) { - return false; - } - - // Read # of rows and cols - f << nr_row << " " << nr_col << " "; - - float data; - std::cerr << "Matrix dimension: " << nr_row << "x" << nr_col << std::endl; - for (int i = 0; i < v.size(); ++i) { - f << v[i] << ' '; - } - f << "\n"; - return true; -} diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_sh/main.cc b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_sh/main.cc deleted file mode 100644 index de0d473ed6fe6724ef81f99b13e02d0de29b103b..0000000000000000000000000000000000000000 --- a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_sh/main.cc +++ /dev/null @@ -1,350 +0,0 @@ -/*************************************************************************** - *cr - *cr (C) Copyright 2010 The Board of Trustees of the - *cr University of Illinois - *cr All Rights Reserved - *cr - ***************************************************************************/ - -/* - * Main entry of dense matrix-matrix multiplication kernel - */ - -#include <hpvm.h> -#include <iostream> -#include <malloc.h> -#include <math.h> -#include <parboil.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sys/time.h> -#include <vector> - -// I/O routines -extern bool readColMajorMatrixFile(const char *fn, int &nr_row, int &nr_col, - std::vector<float> &v); -extern bool writeColMajorMatrixFile(const char *fn, int, int, - std::vector<float> &); -extern char *readFile(const char *); - -// Parameters of tile sizes -#define TILE_N 16 -#define TILE_TB_HEIGHT 8 -#define TILE_M (TILE_N * TILE_TB_HEIGHT) - -#define CHECK_ERROR(errorMessage) \ - if (clStatus != CL_SUCCESS) { \ - std::cout << errorMessage << " Error!\n"; \ - std::cout << "Line: " << __LINE__ << "\n"; \ - exit(1); \ - } - -typedef struct __attribute__((__packed__)) { - float *A; - size_t bytesA; - int lda; - float *B; - size_t bytesB; - int ldb; - float *C; - size_t bytesC; - int ldc; - int k; - float alpha; - float beta; - long block_x; - long block_y; - long grid_x; - long grid_y; -} RootIn; - -void packData(RootIn *args, float *A, size_t bytesA, int lda, float *B, - size_t bytesB, int ldb, float *C, size_t bytesC, int ldc, int k, - float alpha, float beta, long block_x, long block_y, long grid_x, - long grid_y) { - args->A = A; - args->bytesA = bytesA; - args->lda = lda; - args->B = B; - args->bytesB = bytesB; - args->ldb = ldb; - args->C = C; - args->bytesC = bytesC; - args->ldc = ldc; - args->k = k; - args->alpha = alpha; - args->beta = beta; - args->block_x = block_x; - args->block_y = block_y; - args->grid_x = grid_x; - args->grid_y = grid_y; -} - -void Allocation(long block_x, long block_y) { - void *shB = __hpvm__malloc(block_x * block_y * sizeof(float)); - __hpvm__return(2, shB, block_x * block_y * sizeof(float)); -} - -void SgemmLeaf(float *A, size_t bytesA, int lda, float *B, size_t bytesB, - int ldb, float *C, size_t bytesC, int ldc, int k, float alpha, - float beta, float *shB, size_t bytesshB) { - __hpvm__hint(hpvm::DEVICE); - //__hpvm__hint(hpvm::SPIR_TARGET); - //__hpvm__hint(hpvm::GPU_TARGET); - - __hpvm__attributes(3, A, B, C, 1, C); - - void *thisNode = __hpvm__getNode(); - void *parentNode = __hpvm__getParentNode(thisNode); - - long lx = __hpvm__getNodeInstanceID_x(thisNode); - long ly = __hpvm__getNodeInstanceID_y(thisNode); - - long gx = __hpvm__getNodeInstanceID_x(parentNode); - long gy = __hpvm__getNodeInstanceID_y(parentNode); - - long dimx = __hpvm__getNumNodeInstances_x(thisNode); - - float c[TILE_N]; - for (int i = 0; i < TILE_N; i++) - c[i] = 0.0f; - - int mid = ly * dimx + lx; - int m = gx * TILE_M + mid; - int n = gy * TILE_N + lx; - - for (int i = 0; i < k; i += TILE_TB_HEIGHT) { - float a; - // shB[ly][lx] = B[n+(i+ly)*ldb]; - shB[ly * dimx + lx] = B[n + (i + ly) * ldb]; - - __hpvm__barrier(); - for (int j = 0; j < TILE_TB_HEIGHT; j++) { - a = A[m + (i + j) * lda]; - for (int kk = 0; kk < TILE_N; kk++) { - // c[kk] += a * shB[j][kk]; - c[kk] += a * shB[j * dimx + kk]; - } - } - __hpvm__barrier(); - } - - int t = ldc * gy * TILE_N + m; - for (int i = 0; i < TILE_N; i++) { - C[t + i * ldc] = C[t + i * ldc] * beta + alpha * c[i]; - } -} - -// Work group node for sgemm - Creates allocation node and leaf (work item) node -void SgemmTB(float *A, size_t bytesA, int lda, float *B, size_t bytesB, int ldb, - float *C, size_t bytesC, int ldc, int k, float alpha, float beta, - long block_x, long block_y) { - __hpvm__hint(hpvm::CPU_TARGET); - __hpvm__attributes(3, A, B, C, 1, C); - void *AllocationNode = __hpvm__createNodeND(0, Allocation); - void *SgemmLeafNode = __hpvm__createNodeND(2, SgemmLeaf, block_x, block_y); - - // Bind edges - __hpvm__bindIn(SgemmLeafNode, 0, 0, 0); // Bind A - __hpvm__bindIn(SgemmLeafNode, 1, 1, 0); // Bind bytesA - __hpvm__bindIn(SgemmLeafNode, 2, 2, 0); // Bind lda - __hpvm__bindIn(SgemmLeafNode, 3, 3, 0); // Bind B - __hpvm__bindIn(SgemmLeafNode, 4, 4, 0); // Bind bytesB - __hpvm__bindIn(SgemmLeafNode, 5, 5, 0); // Bind ldb - __hpvm__bindIn(SgemmLeafNode, 6, 6, 0); // Bind C - __hpvm__bindIn(SgemmLeafNode, 7, 7, 0); // Bind bytesC - __hpvm__bindIn(SgemmLeafNode, 8, 8, 0); // Bind ldc - __hpvm__bindIn(SgemmLeafNode, 9, 9, 0); // Bind k - __hpvm__bindIn(SgemmLeafNode, 10, 10, 0); // Bind alpha - __hpvm__bindIn(SgemmLeafNode, 11, 11, 0); // Bind beta - - __hpvm__bindIn(AllocationNode, 12, 0, 0); // Bind block_x - __hpvm__bindIn(AllocationNode, 13, 1, 0); // Bind block_y - - // Create Edges between AllocationNode and BFSLeafNodeNode - __hpvm__edge(AllocationNode, SgemmLeafNode, 1, 0, 12, 0); // Edge local_B - __hpvm__edge(AllocationNode, SgemmLeafNode, 1, 1, 13, - 0); // Edge bytes_local_B -} - -// Root node for sgemm - Creates work group node -void SgemmRoot(float *A, size_t bytesA, int lda, // 0-2 - float *B, size_t bytesB, int ldb, // 3-5 - float *C, size_t bytesC, int ldc, // 6-8 - int k, float alpha, float beta, // 9-11 - long block_x, long block_y, long grid_x, long grid_y // 12-15 -) { - __hpvm__hint(hpvm::CPU_TARGET); - __hpvm__attributes(3, A, B, C, 1, C); - void *SgemmTBNode = __hpvm__createNodeND(2, SgemmTB, grid_x, grid_y); - - // Bind edges - __hpvm__bindIn(SgemmTBNode, 0, 0, 0); // Bind A - __hpvm__bindIn(SgemmTBNode, 1, 1, 0); // Bind bytesA - __hpvm__bindIn(SgemmTBNode, 2, 2, 0); // Bind lda - __hpvm__bindIn(SgemmTBNode, 3, 3, 0); // Bind B - __hpvm__bindIn(SgemmTBNode, 4, 4, 0); // Bind bytesB - __hpvm__bindIn(SgemmTBNode, 5, 5, 0); // Bind ldb - __hpvm__bindIn(SgemmTBNode, 6, 6, 0); // Bind C - __hpvm__bindIn(SgemmTBNode, 7, 7, 0); // Bind bytesC - __hpvm__bindIn(SgemmTBNode, 8, 8, 0); // Bind ldc - __hpvm__bindIn(SgemmTBNode, 9, 9, 0); // Bind k - __hpvm__bindIn(SgemmTBNode, 10, 10, 0); // Bind alpha - __hpvm__bindIn(SgemmTBNode, 11, 11, 0); // Bind beta - __hpvm__bindIn(SgemmTBNode, 12, 12, 0); // Bind block_x - __hpvm__bindIn(SgemmTBNode, 13, 13, 0); // Bind block_y -} - -void SgemmWrapper(float *A, size_t bytesA, int lda, // 0-2 - float *B, size_t bytesB, int ldb, // 3-5 - float *C, size_t bytesC, int ldc, // 6-8 - int k, float alpha, float beta, // 9-11 - long block_x, long block_y, long grid_x, long grid_y // 12-15 -) { - __hpvm__hint(hpvm::CPU_TARGET); - __hpvm__attributes(3, A, B, C, 1, C); - void *SgemmRootNode = __hpvm__createNodeND(0, SgemmRoot); - - // Bind edges - __hpvm__bindIn(SgemmRootNode, 0, 0, 0); // Bind A - __hpvm__bindIn(SgemmRootNode, 1, 1, 0); // Bind bytesA - __hpvm__bindIn(SgemmRootNode, 2, 2, 0); // Bind lda - __hpvm__bindIn(SgemmRootNode, 3, 3, 0); // Bind B - __hpvm__bindIn(SgemmRootNode, 4, 4, 0); // Bind bytesB - __hpvm__bindIn(SgemmRootNode, 5, 5, 0); // Bind ldb - __hpvm__bindIn(SgemmRootNode, 6, 6, 0); // Bind C - __hpvm__bindIn(SgemmRootNode, 7, 7, 0); // Bind bytesC - __hpvm__bindIn(SgemmRootNode, 8, 8, 0); // Bind ldc - __hpvm__bindIn(SgemmRootNode, 9, 9, 0); // Bind k - __hpvm__bindIn(SgemmRootNode, 10, 10, 0); // Bind alpha - __hpvm__bindIn(SgemmRootNode, 11, 11, 0); // Bind beta - __hpvm__bindIn(SgemmRootNode, 12, 12, 0); // Bind block_x - __hpvm__bindIn(SgemmRootNode, 13, 13, 0); // Bind block_y - __hpvm__bindIn(SgemmRootNode, 14, 14, 0); // Bind grid_x - __hpvm__bindIn(SgemmRootNode, 15, 15, 0); // Bind grid_y -} - -// Creates root node for sgemm -__attribute__((noinline)) void basicSgemm(struct pb_TimerSet *timers, - char transa, char transb, int m, - int n, int k, float alpha, float *A, - size_t bytesA, int lda, float *B, - size_t bytesB, int ldb, float beta, - float *C, size_t bytesC, int ldc) { - if ((transa != 'N') && (transa != 'n')) { - std::cerr << "unsupported value of 'transa' in regtileSgemm()" << std::endl; - return; - } - - if ((transb != 'T') && (transb != 't')) { - std::cerr << "unsupported value of 'transb' in regtileSgemm()" << std::endl; - return; - } - - // In this code we assume the matrix sizes are multiple of tile size - if ((m % TILE_M) || (n % TILE_N)) { - std::cerr << "unsupported size of matrix. m should be multiple of " - << TILE_M << "; n should be multiple of " << TILE_N << std::endl; - return; - } - - // unsigned db[2] = {TILE_N,TILE_TB_HEIGHT}; - // unsigned dg[2] = {m*TILE_N/TILE_M,n*TILE_TB_HEIGHT/TILE_N}; - - long block_x = TILE_N; - long block_y = TILE_TB_HEIGHT; - long grid_x = m / TILE_M; - long grid_y = n / TILE_N; - - // Pack data in struct - RootIn *args = (RootIn *)malloc(sizeof(RootIn)); - packData(args, A, bytesA, lda, B, bytesB, ldb, C, bytesC, ldc, k, alpha, beta, - block_x, block_y, grid_x, grid_y); - - pb_SwitchToTimer(timers, hpvm_TimerID_COMPUTATION); - void *sgemmDFG = __hpvm__launch(0, SgemmWrapper, (void *)args); - - __hpvm__wait(sgemmDFG); - pb_SwitchToTimer(timers, pb_TimerID_COMPUTE); -} - -int main(int argc, char *argv[]) { - - struct pb_Parameters *params; - struct pb_TimerSet timers; - - size_t A_sz, B_sz, C_sz; - int matArow, matAcol; - int matBrow, matBcol; - std::vector<float> matA, matBT; - - /* Read command line. Expect 3 inputs: A, B and B^T - in column-major layout*/ - params = pb_ReadParameters(&argc, argv); - if ((params->inpFiles[0] == NULL) || (params->inpFiles[1] == NULL) || - (params->inpFiles[2] == NULL) || (params->inpFiles[3] != NULL)) { - fprintf(stderr, "Expecting three input filenames\n"); - exit(-1); - } - - /* Read in data */ - // load A - readColMajorMatrixFile(params->inpFiles[0], matArow, matAcol, matA); - - // load B^T - readColMajorMatrixFile(params->inpFiles[2], matBcol, matBrow, matBT); - - pb_InitializeTimerSet(&timers); - __hpvm__init(); - - pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); - // copy A to device memory - A_sz = matArow * matAcol * sizeof(float); - B_sz = matBrow * matBcol * sizeof(float); - - // allocate space for C - C_sz = matArow * matBcol * sizeof(float); - - // OpenCL memory allocation - std::vector<float> matC(matArow * matBcol); - - llvm_hpvm_track_mem(&matA.front(), A_sz); - llvm_hpvm_track_mem(&matBT.front(), B_sz); - llvm_hpvm_track_mem(&matC.front(), C_sz); - // Copy A and B^T into device memory - pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); - - for (size_t i = 0; i < matC.size(); i++) - matC[i] = 0.0f; - - // Use standard sgemm interface - basicSgemm(&timers, 'N', 'T', matArow, matBcol, matAcol, 1.0f, &matA.front(), - A_sz, matArow, &matBT.front(), B_sz, matBcol, 0.0f, &matC.front(), - C_sz, matArow); - - pb_SwitchToTimer(&timers, pb_TimerID_COPY); - llvm_hpvm_request_mem(&matC.front(), C_sz); - - pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); - llvm_hpvm_untrack_mem(&matA.front()); - llvm_hpvm_untrack_mem(&matBT.front()); - llvm_hpvm_untrack_mem(&matC.front()); - pb_SwitchToTimer(&timers, pb_TimerID_NONE); - - pb_PrintTimerSet(&timers); - __hpvm__cleanup(); - - if (params->outFile) { - /* Write C to file */ - // pb_SwitchToTimer(&timers, pb_TimerID_IO); - writeColMajorMatrixFile(params->outFile, matArow, matBcol, matC); - } - - double GPUtime = pb_GetElapsedTime(&(timers.timers[pb_TimerID_KERNEL])); - std::cout << "GFLOPs = " << 2. * matArow * matBcol * matAcol / GPUtime / 1e9 - << std::endl; - pb_FreeParameters(params); - - return 0; -} diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_tc/Makefile b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_tc/Makefile deleted file mode 100644 index 2234bf54e1e665f95b38dd0e25c2fe1b5539ce4e..0000000000000000000000000000000000000000 --- a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_tc/Makefile +++ /dev/null @@ -1,8 +0,0 @@ -# (c) 2010 The Board of Trustees of the University of Illinois. - -LANGUAGE=hpvm -SRCDIR_OBJS=io.ll #compute_gold.o -HPVM_OBJS=main.hpvm.ll -APP_CUDALDFLAGS=-lm -lstdc++ -APP_CFLAGS=-ffast-math -O3 -APP_CXXFLAGS=-ffast-math -O3 diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_tc/io.cc b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_tc/io.cc deleted file mode 100644 index 04744f404ebaf6e669c2bbe91600519742b57dc9..0000000000000000000000000000000000000000 --- a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_tc/io.cc +++ /dev/null @@ -1,84 +0,0 @@ -/*************************************************************************** - *cr - *cr (C) Copyright 2010 The Board of Trustees of the - *cr University of Illinois - *cr All Rights Reserved - *cr - ***************************************************************************/ - -/* I/O routines for reading and writing matrices in column-major - * layout - */ - -#include <fstream> -#include <iostream> -#include <vector> - -char *readFile(const char *fileName) { - std::fstream f(fileName, std::fstream::in); - if (!f.good()) { - std::cerr << "Error Reading File!!" << std::endl; - return NULL; - } - - f.seekg(0, std::ios::end); - int length = f.tellg(); - f.seekg(0, std::ios::beg); - - char *buffer; - - if (length > 0) { - buffer = new char[length]; - f.read(buffer, length); - buffer[length - 1] = 0; - } else { - buffer = new char; - buffer[0] = 0; - } - - f.close(); - - return buffer; -} - -bool readColMajorMatrixFile(const char *fn, int &nr_row, int &nr_col, - std::vector<float> &v) { - std::cerr << "Opening file:" << fn << std::endl; - std::fstream f(fn, std::fstream::in); - if (!f.good()) { - return false; - } - - // Read # of rows and cols - f >> nr_row; - f >> nr_col; - - float data; - std::cerr << "Matrix dimension: " << nr_row << "x" << nr_col << std::endl; - while (f.good()) { - f >> data; - v.push_back(data); - } - v.pop_back(); // remove the duplicated last element - return true; -} - -bool writeColMajorMatrixFile(const char *fn, int nr_row, int nr_col, - std::vector<float> &v) { - std::cerr << "Opening file:" << fn << " for write." << std::endl; - std::fstream f(fn, std::fstream::out); - if (!f.good()) { - return false; - } - - // Read # of rows and cols - f << nr_row << " " << nr_col << " "; - - float data; - std::cerr << "Matrix dimension: " << nr_row << "x" << nr_col << std::endl; - for (int i = 0; i < v.size(); ++i) { - f << v[i] << ' '; - } - f << "\n"; - return true; -} diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_tc/main.cc b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_tc/main.cc deleted file mode 100644 index be39d713d55d1cb518083679fb1ea1ce717a4ca9..0000000000000000000000000000000000000000 --- a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_tc/main.cc +++ /dev/null @@ -1,180 +0,0 @@ -/*************************************************************************** - *cr - *cr (C) Copyright 2010 The Board of Trustees of the - *cr University of Illinois - *cr All Rights Reserved - *cr - ***************************************************************************/ - -/* - * Main entry of dense matrix-matrix multiplication kernel - */ - -#include <hpvm.h> -#include <iostream> -#include <malloc.h> -#include <math.h> -#include <parboil.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sys/time.h> -#include <vector> - -// I/O routines -extern bool readColMajorMatrixFile(const char *fn, int &nr_row, int &nr_col, - std::vector<float> &v); -extern bool writeColMajorMatrixFile(const char *fn, int, int, - std::vector<float> &); -extern char *readFile(const char *); - -// Parameters of tile sizes -#define TILE_SZ 16 - -#define CHECK_ERROR(errorMessage) \ - if (clStatus != CL_SUCCESS) { \ - std::cout << errorMessage << " Error!\n"; \ - std::cout << "Line: " << __LINE__ << "\n"; \ - exit(1); \ - } - -void mysgemmNT(float *A, int lda, float *B, int ldb, float *C, int ldc, int k, - float alpha, float beta) { - __hpvm__attributes(3, A, B, C, 1, C); - float c0, c1, c2, c3; - c0 = c1 = c2 = c3 = 0.0f; - int m = 4 * get_global_id(0); - int n = get_global_id(1); - - for (int i = 0; i < k; ++i) { - float a0 = A[m + i * lda]; - float a1 = A[m + 1 + i * lda]; - float a2 = A[m + 2 + i * lda]; - float a3 = A[m + 3 + i * lda]; - - float b = B[n + i * ldb]; - - c0 += a0 * b; - c1 += a1 * b; - c2 += a2 * b; - c3 += a3 * b; - } - C[m + n * ldc] = C[m + n * ldc] * beta + alpha * c0; - C[m + 1 + n * ldc] = C[m + 1 + n * ldc] * beta + alpha * c1; - C[m + 2 + n * ldc] = C[m + 2 + n * ldc] * beta + alpha * c2; - C[m + 3 + n * ldc] = C[m + 3 + n * ldc] * beta + alpha * c3; -} - -__attribute__((noinline)) void basicSgemm(char transa, char transb, int m, - int n, int k, float alpha, float *A, - size_t bytesA, int lda, float *B, - size_t bytesB, int ldb, float beta, - float *C, size_t bytesC, int ldc) { - if ((transa != 'N') && (transa != 'n')) { - std::cerr << "unsupported value of 'transa' in regtileSgemm()" << std::endl; - return; - } - - if ((transb != 'T') && (transb != 't')) { - std::cerr << "unsupported value of 'transb' in regtileSgemm()" << std::endl; - return; - } - - // In this code we assume the matrix sizes are multiple of tile size - if ((m % TILE_SZ) || (n % TILE_SZ)) { - std::cerr << "unsupported size of matrix. m should be multiple of " - << TILE_SZ << "; n should be multiple of " << TILE_SZ - << std::endl; - } - - unsigned db[2] = {TILE_SZ / 4, TILE_SZ}; - unsigned dg[2] = {m / TILE_SZ * db[0], n / TILE_SZ * db[1]}; - - unsigned sgemmDFG = __hpvm__node(mysgemmNT, 2, 2, db[0], db[1], dg[0] / db[0], - dg[1] / db[1], 12, A, bytesA, lda, B, bytesB, - ldb, C, bytesC, ldc, k, alpha, beta, 0); - __hpvm__wait(sgemmDFG); -} - -int main(int argc, char *argv[]) { - - struct pb_Parameters *params; - struct pb_TimerSet timers; - - size_t A_sz, B_sz, C_sz; - int matArow, matAcol; - int matBrow, matBcol; - std::vector<float> matA, matBT; - - pb_InitializeTimerSet(&timers); - __hpvm__init(); - - /* Read command line. Expect 3 inputs: A, B and B^T - in column-major layout*/ - params = pb_ReadParameters(&argc, argv); - if ((params->inpFiles[0] == NULL) || (params->inpFiles[1] == NULL) || - (params->inpFiles[2] == NULL) || (params->inpFiles[3] != NULL)) { - fprintf(stderr, "Expecting three input filenames\n"); - exit(-1); - } - - /* Read in data */ - pb_SwitchToTimer(&timers, pb_TimerID_IO); - - // load A - readColMajorMatrixFile(params->inpFiles[0], matArow, matAcol, matA); - - // load B^T - readColMajorMatrixFile(params->inpFiles[2], matBcol, matBrow, matBT); - - pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); - // copy A to device memory - A_sz = matArow * matAcol * sizeof(float); - B_sz = matBrow * matBcol * sizeof(float); - - // allocate space for C - C_sz = matArow * matBcol * sizeof(float); - - // OpenCL memory allocation - std::vector<float> matC(matArow * matBcol); - - llvm_hpvm_track_mem(&matA.front(), A_sz); - llvm_hpvm_track_mem(&matBT.front(), B_sz); - llvm_hpvm_track_mem(&matC.front(), C_sz); - // Copy A and B^T into device memory - pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); - - for (size_t i = 0; i < matC.size(); i++) - matC[i] = 0.0f; - - pb_SwitchToTimer(&timers, pb_TimerID_NONE); - - // Use standard sgemm interface - basicSgemm('N', 'T', matArow, matBcol, matAcol, 1.0f, &matA.front(), A_sz, - matArow, &matBT.front(), B_sz, matBcol, 0.0f, &matC.front(), C_sz, - matArow); - - if (params->outFile) { - pb_SwitchToTimer(&timers, pb_TimerID_COPY); - - /* Write C to file */ - llvm_hpvm_request_mem(&matC.front(), C_sz); - pb_SwitchToTimer(&timers, pb_TimerID_IO); - writeColMajorMatrixFile(params->outFile, matArow, matBcol, matC); - } - - pb_SwitchToTimer(&timers, hpvm_TimerID_MEM_UNTRACK); - llvm_hpvm_untrack_mem(&matA.front()); - llvm_hpvm_untrack_mem(&matBT.front()); - llvm_hpvm_untrack_mem(&matC.front()); - pb_SwitchToTimer(&timers, pb_TimerID_NONE); - - double GPUtime = pb_GetElapsedTime(&(timers.timers[pb_TimerID_KERNEL])); - std::cout << "GFLOPs = " << 2. * matArow * matBcol * matAcol / GPUtime / 1e9 - << std::endl; - pb_PrintTimerSet(&timers); - __hpvm__cleanup(); - pb_FreeParameters(params); - - return 0; -} diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_tc_vec/Makefile b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_tc_vec/Makefile deleted file mode 100644 index 2234bf54e1e665f95b38dd0e25c2fe1b5539ce4e..0000000000000000000000000000000000000000 --- a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_tc_vec/Makefile +++ /dev/null @@ -1,8 +0,0 @@ -# (c) 2010 The Board of Trustees of the University of Illinois. - -LANGUAGE=hpvm -SRCDIR_OBJS=io.ll #compute_gold.o -HPVM_OBJS=main.hpvm.ll -APP_CUDALDFLAGS=-lm -lstdc++ -APP_CFLAGS=-ffast-math -O3 -APP_CXXFLAGS=-ffast-math -O3 diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_tc_vec/io.cc b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_tc_vec/io.cc deleted file mode 100644 index 04744f404ebaf6e669c2bbe91600519742b57dc9..0000000000000000000000000000000000000000 --- a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_tc_vec/io.cc +++ /dev/null @@ -1,84 +0,0 @@ -/*************************************************************************** - *cr - *cr (C) Copyright 2010 The Board of Trustees of the - *cr University of Illinois - *cr All Rights Reserved - *cr - ***************************************************************************/ - -/* I/O routines for reading and writing matrices in column-major - * layout - */ - -#include <fstream> -#include <iostream> -#include <vector> - -char *readFile(const char *fileName) { - std::fstream f(fileName, std::fstream::in); - if (!f.good()) { - std::cerr << "Error Reading File!!" << std::endl; - return NULL; - } - - f.seekg(0, std::ios::end); - int length = f.tellg(); - f.seekg(0, std::ios::beg); - - char *buffer; - - if (length > 0) { - buffer = new char[length]; - f.read(buffer, length); - buffer[length - 1] = 0; - } else { - buffer = new char; - buffer[0] = 0; - } - - f.close(); - - return buffer; -} - -bool readColMajorMatrixFile(const char *fn, int &nr_row, int &nr_col, - std::vector<float> &v) { - std::cerr << "Opening file:" << fn << std::endl; - std::fstream f(fn, std::fstream::in); - if (!f.good()) { - return false; - } - - // Read # of rows and cols - f >> nr_row; - f >> nr_col; - - float data; - std::cerr << "Matrix dimension: " << nr_row << "x" << nr_col << std::endl; - while (f.good()) { - f >> data; - v.push_back(data); - } - v.pop_back(); // remove the duplicated last element - return true; -} - -bool writeColMajorMatrixFile(const char *fn, int nr_row, int nr_col, - std::vector<float> &v) { - std::cerr << "Opening file:" << fn << " for write." << std::endl; - std::fstream f(fn, std::fstream::out); - if (!f.good()) { - return false; - } - - // Read # of rows and cols - f << nr_row << " " << nr_col << " "; - - float data; - std::cerr << "Matrix dimension: " << nr_row << "x" << nr_col << std::endl; - for (int i = 0; i < v.size(); ++i) { - f << v[i] << ' '; - } - f << "\n"; - return true; -} diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_tc_vec/main.cc b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_tc_vec/main.cc deleted file mode 100644 index be39d713d55d1cb518083679fb1ea1ce717a4ca9..0000000000000000000000000000000000000000 --- a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_tc_vec/main.cc +++ /dev/null @@ -1,180 +0,0 @@ -/*************************************************************************** - *cr - *cr (C) Copyright 2010 The Board of Trustees of the - *cr University of Illinois - *cr All Rights Reserved - *cr - ***************************************************************************/ - -/* - * Main entry of dense matrix-matrix multiplication kernel - */ - -#include <hpvm.h> -#include <iostream> -#include <malloc.h> -#include <math.h> -#include <parboil.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sys/time.h> -#include <vector> - -// I/O routines -extern bool readColMajorMatrixFile(const char *fn, int &nr_row, int &nr_col, - std::vector<float> &v); -extern bool writeColMajorMatrixFile(const char *fn, int, int, - std::vector<float> &); -extern char *readFile(const char *); - -// Parameters of tile sizes -#define TILE_SZ 16 - -#define CHECK_ERROR(errorMessage) \ - if (clStatus != CL_SUCCESS) { \ - std::cout << errorMessage << " Error!\n"; \ - std::cout << "Line: " << __LINE__ << "\n"; \ - exit(1); \ - } - -void mysgemmNT(float *A, int lda, float *B, int ldb, float *C, int ldc, int k, - float alpha, float beta) { - __hpvm__attributes(3, A, B, C, 1, C); - float c0, c1, c2, c3; - c0 = c1 = c2 = c3 = 0.0f; - int m = 4 * get_global_id(0); - int n = get_global_id(1); - - for (int i = 0; i < k; ++i) { - float a0 = A[m + i * lda]; - float a1 = A[m + 1 + i * lda]; - float a2 = A[m + 2 + i * lda]; - float a3 = A[m + 3 + i * lda]; - - float b = B[n + i * ldb]; - - c0 += a0 * b; - c1 += a1 * b; - c2 += a2 * b; - c3 += a3 * b; - } - C[m + n * ldc] = C[m + n * ldc] * beta + alpha * c0; - C[m + 1 + n * ldc] = C[m + 1 + n * ldc] * beta + alpha * c1; - C[m + 2 + n * ldc] = C[m + 2 + n * ldc] * beta + alpha * c2; - C[m + 3 + n * ldc] = C[m + 3 + n * ldc] * beta + alpha * c3; -} - -__attribute__((noinline)) void basicSgemm(char transa, char transb, int m, - int n, int k, float alpha, float *A, - size_t bytesA, int lda, float *B, - size_t bytesB, int ldb, float beta, - float *C, size_t bytesC, int ldc) { - if ((transa != 'N') && (transa != 'n')) { - std::cerr << "unsupported value of 'transa' in regtileSgemm()" << std::endl; - return; - } - - if ((transb != 'T') && (transb != 't')) { - std::cerr << "unsupported value of 'transb' in regtileSgemm()" << std::endl; - return; - } - - // In this code we assume the matrix sizes are multiple of tile size - if ((m % TILE_SZ) || (n % TILE_SZ)) { - std::cerr << "unsupported size of matrix. m should be multiple of " - << TILE_SZ << "; n should be multiple of " << TILE_SZ - << std::endl; - } - - unsigned db[2] = {TILE_SZ / 4, TILE_SZ}; - unsigned dg[2] = {m / TILE_SZ * db[0], n / TILE_SZ * db[1]}; - - unsigned sgemmDFG = __hpvm__node(mysgemmNT, 2, 2, db[0], db[1], dg[0] / db[0], - dg[1] / db[1], 12, A, bytesA, lda, B, bytesB, - ldb, C, bytesC, ldc, k, alpha, beta, 0); - __hpvm__wait(sgemmDFG); -} - -int main(int argc, char *argv[]) { - - struct pb_Parameters *params; - struct pb_TimerSet timers; - - size_t A_sz, B_sz, C_sz; - int matArow, matAcol; - int matBrow, matBcol; - std::vector<float> matA, matBT; - - pb_InitializeTimerSet(&timers); - __hpvm__init(); - - /* Read command line. Expect 3 inputs: A, B and B^T - in column-major layout*/ - params = pb_ReadParameters(&argc, argv); - if ((params->inpFiles[0] == NULL) || (params->inpFiles[1] == NULL) || - (params->inpFiles[2] == NULL) || (params->inpFiles[3] != NULL)) { - fprintf(stderr, "Expecting three input filenames\n"); - exit(-1); - } - - /* Read in data */ - pb_SwitchToTimer(&timers, pb_TimerID_IO); - - // load A - readColMajorMatrixFile(params->inpFiles[0], matArow, matAcol, matA); - - // load B^T - readColMajorMatrixFile(params->inpFiles[2], matBcol, matBrow, matBT); - - pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); - // copy A to device memory - A_sz = matArow * matAcol * sizeof(float); - B_sz = matBrow * matBcol * sizeof(float); - - // allocate space for C - C_sz = matArow * matBcol * sizeof(float); - - // OpenCL memory allocation - std::vector<float> matC(matArow * matBcol); - - llvm_hpvm_track_mem(&matA.front(), A_sz); - llvm_hpvm_track_mem(&matBT.front(), B_sz); - llvm_hpvm_track_mem(&matC.front(), C_sz); - // Copy A and B^T into device memory - pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); - - for (size_t i = 0; i < matC.size(); i++) - matC[i] = 0.0f; - - pb_SwitchToTimer(&timers, pb_TimerID_NONE); - - // Use standard sgemm interface - basicSgemm('N', 'T', matArow, matBcol, matAcol, 1.0f, &matA.front(), A_sz, - matArow, &matBT.front(), B_sz, matBcol, 0.0f, &matC.front(), C_sz, - matArow); - - if (params->outFile) { - pb_SwitchToTimer(&timers, pb_TimerID_COPY); - - /* Write C to file */ - llvm_hpvm_request_mem(&matC.front(), C_sz); - pb_SwitchToTimer(&timers, pb_TimerID_IO); - writeColMajorMatrixFile(params->outFile, matArow, matBcol, matC); - } - - pb_SwitchToTimer(&timers, hpvm_TimerID_MEM_UNTRACK); - llvm_hpvm_untrack_mem(&matA.front()); - llvm_hpvm_untrack_mem(&matBT.front()); - llvm_hpvm_untrack_mem(&matC.front()); - pb_SwitchToTimer(&timers, pb_TimerID_NONE); - - double GPUtime = pb_GetElapsedTime(&(timers.timers[pb_TimerID_KERNEL])); - std::cout << "GFLOPs = " << 2. * matArow * matBcol * matAcol / GPUtime / 1e9 - << std::endl; - pb_PrintTimerSet(&timers); - __hpvm__cleanup(); - pb_FreeParameters(params); - - return 0; -} diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec/Makefile b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec/Makefile deleted file mode 100644 index 2234bf54e1e665f95b38dd0e25c2fe1b5539ce4e..0000000000000000000000000000000000000000 --- a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec/Makefile +++ /dev/null @@ -1,8 +0,0 @@ -# (c) 2010 The Board of Trustees of the University of Illinois. - -LANGUAGE=hpvm -SRCDIR_OBJS=io.ll #compute_gold.o -HPVM_OBJS=main.hpvm.ll -APP_CUDALDFLAGS=-lm -lstdc++ -APP_CFLAGS=-ffast-math -O3 -APP_CXXFLAGS=-ffast-math -O3 diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec/io.cc b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec/io.cc deleted file mode 100644 index 04744f404ebaf6e669c2bbe91600519742b57dc9..0000000000000000000000000000000000000000 --- a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec/io.cc +++ /dev/null @@ -1,84 +0,0 @@ -/*************************************************************************** - *cr - *cr (C) Copyright 2010 The Board of Trustees of the - *cr University of Illinois - *cr All Rights Reserved - *cr - ***************************************************************************/ - -/* I/O routines for reading and writing matrices in column-major - * layout - */ - -#include <fstream> -#include <iostream> -#include <vector> - -char *readFile(const char *fileName) { - std::fstream f(fileName, std::fstream::in); - if (!f.good()) { - std::cerr << "Error Reading File!!" << std::endl; - return NULL; - } - - f.seekg(0, std::ios::end); - int length = f.tellg(); - f.seekg(0, std::ios::beg); - - char *buffer; - - if (length > 0) { - buffer = new char[length]; - f.read(buffer, length); - buffer[length - 1] = 0; - } else { - buffer = new char; - buffer[0] = 0; - } - - f.close(); - - return buffer; -} - -bool readColMajorMatrixFile(const char *fn, int &nr_row, int &nr_col, - std::vector<float> &v) { - std::cerr << "Opening file:" << fn << std::endl; - std::fstream f(fn, std::fstream::in); - if (!f.good()) { - return false; - } - - // Read # of rows and cols - f >> nr_row; - f >> nr_col; - - float data; - std::cerr << "Matrix dimension: " << nr_row << "x" << nr_col << std::endl; - while (f.good()) { - f >> data; - v.push_back(data); - } - v.pop_back(); // remove the duplicated last element - return true; -} - -bool writeColMajorMatrixFile(const char *fn, int nr_row, int nr_col, - std::vector<float> &v) { - std::cerr << "Opening file:" << fn << " for write." << std::endl; - std::fstream f(fn, std::fstream::out); - if (!f.good()) { - return false; - } - - // Read # of rows and cols - f << nr_row << " " << nr_col << " "; - - float data; - std::cerr << "Matrix dimension: " << nr_row << "x" << nr_col << std::endl; - for (int i = 0; i < v.size(); ++i) { - f << v[i] << ' '; - } - f << "\n"; - return true; -} diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec/kernel.cl b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec/kernel.cl deleted file mode 100644 index 7530a400759e2d6db6ffd466c3f6aaf9dfab2117..0000000000000000000000000000000000000000 --- a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec/kernel.cl +++ /dev/null @@ -1,53 +0,0 @@ -/*************************************************************************** - *cr - *cr (C) Copyright 2010 The Board of Trustees of the - *cr University of Illinois - *cr All Rights Reserved - *cr - ***************************************************************************/ - -/* - * Kernel of dense matrix-matrix multiplication kernel. - */ - -__kernel void mysgemmNT( __global float *A, size_t bytesA, int lda, __global float *B, size_t bytesB, int ldb, __global float* C, size_t bytesC, int ldc, int k, float alpha, float beta ) -{ -/* - // Partial results - float c[8]; - for (int i=0; i < 8; i++) - c[i] = 0.0f; - float a[8]; - float b[8]; - - int m = get_global_id(0) * 8; - int n = get_global_id(1); - - for (int i = 0; i < k; ++i) { - for (int id = 0; id < 8; id++) { - a[id] = A[m + id + i * lda]; - b[id] = B[n + i * ldb]; - c[id] += a[id] * b[id]; - } - } - - for (int id = 0; id < 8; id++) - C[m+id+n*ldc] = C[m+id+n*ldc] * beta + alpha * c[id]; -*/ - - // Partial results - float8 cp = (float8)(0.0f); - - int m = get_global_id(0) * 8; - int n = get_global_id(1); - - for (int i = 0; i < k; ++i) { - float8 a = vload8(0, A + (m + i * lda)); - float8 b = (float8)(B[n + i * ldb]); - cp += a * b; - } - - float8 c = vload8(0, C + (m+n*ldc)); - c = c * beta + alpha * cp; - vstore8(c, 0, C + (m+n*ldc)); -} diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec/main.cc b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec/main.cc deleted file mode 100644 index 286297d6fefe0b6f72bdc9e8a9079a131a7b16bf..0000000000000000000000000000000000000000 --- a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec/main.cc +++ /dev/null @@ -1,189 +0,0 @@ -/*************************************************************************** - *cr - *cr (C) Copyright 2010 The Board of Trustees of the - *cr University of Illinois - *cr All Rights Reserved - *cr - ***************************************************************************/ - -/* - * Main entry of dense matrix-matrix multiplication kernel - */ - -#include <hpvm.h> -#include <iostream> -#include <malloc.h> -#include <math.h> -#include <parboil.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sys/time.h> -#include <vector> - -// I/O routines -extern bool readColMajorMatrixFile(const char *fn, int &nr_row, int &nr_col, - std::vector<float> &v); -extern bool writeColMajorMatrixFile(const char *fn, int, int, - std::vector<float> &); -extern char *readFile(const char *); - -// Parameters of tile sizes -#define TILE_SZ 16 -#define VEC_SZ 8 - -#define CHECK_ERROR(errorMessage) \ - if (clStatus != CL_SUCCESS) { \ - std::cout << errorMessage << " Error!\n"; \ - std::cout << "Line: " << __LINE__ << "\n"; \ - exit(1); \ - } - -void mysgemmNT(float *A, int lda, float *B, int ldb, float *C, int ldc, int k, - float alpha, float beta) { - __hpvm__hint(hpvm::GPU_TARGET); - __hpvm__attributes(3, A, B, C, 1, C); - - float c = 0.0f; - int m = get_global_id(0); - int n = get_global_id(1); - - for (int i = 0; i < k; ++i) { - float a = A[m + i * lda]; - float b = B[n + i * ldb]; - c += a * b; - } - C[m + n * ldc] = C[m + n * ldc] * beta + alpha * c; - /* - Will be substituted by this kernel at the llvm level - // Partial results - float8 cp = (float8)(0.0f); - - int m = get_global_id(0) * 8; - int n = get_global_id(1); - - for (int i = 0; i < k; ++i) { - float8 a = vload8(0, A + (m + i * lda)); - float8 b = (float8)(B[n + i * ldb]); - cp += a * b; - } - - float8 c = vload8(0, C + (m+n*ldc)); - c = c * beta + alpha * cp; - vstore8(c, 0, C + (m+n*ldc)); - */ -} - -__attribute__((noinline)) void basicSgemm(char transa, char transb, int m, - int n, int k, float alpha, float *A, - size_t bytesA, int lda, float *B, - size_t bytesB, int ldb, float beta, - float *C, size_t bytesC, int ldc) { - if ((transa != 'N') && (transa != 'n')) { - std::cerr << "unsupported value of 'transa' in regtileSgemm()" << std::endl; - return; - } - - if ((transb != 'T') && (transb != 't')) { - std::cerr << "unsupported value of 'transb' in regtileSgemm()" << std::endl; - return; - } - - // In this code we assume the matrix sizes are multiple of tile size - if ((m % TILE_SZ) || (n % TILE_SZ)) { - std::cerr << "unsupported size of matrix. m should be multiple of " - << TILE_SZ << "; n should be multiple of " << TILE_SZ - << std::endl; - } - - unsigned db[2] = {TILE_SZ / VEC_SZ, TILE_SZ}; - unsigned dg[2] = {m / TILE_SZ * db[0], n / TILE_SZ * db[1]}; - - unsigned sgemmDFG = __hpvm__node(mysgemmNT, 2, 2, db[0], db[1], dg[0] / db[0], - dg[1] / db[1], 12, A, bytesA, lda, B, bytesB, - ldb, C, bytesC, ldc, k, alpha, beta, 0); - __hpvm__wait(sgemmDFG); -} - -int main(int argc, char *argv[]) { - - struct pb_Parameters *params; - struct pb_TimerSet timers; - - size_t A_sz, B_sz, C_sz; - int matArow, matAcol; - int matBrow, matBcol; - std::vector<float> matA, matBT; - - /* Read command line. Expect 3 inputs: A, B and B^T - in column-major layout*/ - params = pb_ReadParameters(&argc, argv); - if ((params->inpFiles[0] == NULL) || (params->inpFiles[1] == NULL) || - (params->inpFiles[2] == NULL) || (params->inpFiles[3] != NULL)) { - fprintf(stderr, "Expecting three input filenames\n"); - exit(-1); - } - - /* Read in data */ - // load A - readColMajorMatrixFile(params->inpFiles[0], matArow, matAcol, matA); - - // load B^T - readColMajorMatrixFile(params->inpFiles[2], matBcol, matBrow, matBT); - - pb_InitializeTimerSet(&timers); - __hpvm__init(); - - pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); - // copy A to device memory - A_sz = matArow * matAcol * sizeof(float); - B_sz = matBrow * matBcol * sizeof(float); - - // allocate space for C - C_sz = matArow * matBcol * sizeof(float); - - // OpenCL memory allocation - std::vector<float> matC(matArow * matBcol); - - llvm_hpvm_track_mem(&matA.front(), A_sz); - llvm_hpvm_track_mem(&matBT.front(), B_sz); - llvm_hpvm_track_mem(&matC.front(), C_sz); - // Copy A and B^T into device memory - pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); - - for (size_t i = 0; i < matC.size(); i++) - matC[i] = 0.0f; - - pb_SwitchToTimer(&timers, pb_TimerID_NONE); - - // Use standard sgemm interface - basicSgemm('N', 'T', matArow, matBcol, matAcol, 1.0f, &matA.front(), A_sz, - matArow, &matBT.front(), B_sz, matBcol, 0.0f, &matC.front(), C_sz, - matArow); - - pb_SwitchToTimer(&timers, pb_TimerID_COPY); - llvm_hpvm_request_mem(&matC.front(), C_sz); - - pb_SwitchToTimer(&timers, hpvm_TimerID_MEM_UNTRACK); - llvm_hpvm_untrack_mem(&matA.front()); - llvm_hpvm_untrack_mem(&matBT.front()); - llvm_hpvm_untrack_mem(&matC.front()); - pb_SwitchToTimer(&timers, pb_TimerID_NONE); - - pb_PrintTimerSet(&timers); - __hpvm__cleanup(); - - if (params->outFile) { - - /* Write C to file */ - // pb_SwitchToTimer(&timers, pb_TimerID_IO); - writeColMajorMatrixFile(params->outFile, matArow, matBcol, matC); - } - - double GPUtime = pb_GetElapsedTime(&(timers.timers[pb_TimerID_KERNEL])); - std::cout << "GFLOPs = " << 2. * matArow * matBcol * matAcol / GPUtime / 1e9 - << std::endl; - pb_FreeParameters(params); - - return 0; -} diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec_opt/Makefile b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec_opt/Makefile deleted file mode 100644 index 2234bf54e1e665f95b38dd0e25c2fe1b5539ce4e..0000000000000000000000000000000000000000 --- a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec_opt/Makefile +++ /dev/null @@ -1,8 +0,0 @@ -# (c) 2010 The Board of Trustees of the University of Illinois. - -LANGUAGE=hpvm -SRCDIR_OBJS=io.ll #compute_gold.o -HPVM_OBJS=main.hpvm.ll -APP_CUDALDFLAGS=-lm -lstdc++ -APP_CFLAGS=-ffast-math -O3 -APP_CXXFLAGS=-ffast-math -O3 diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec_opt/io.cc b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec_opt/io.cc deleted file mode 100644 index 04744f404ebaf6e669c2bbe91600519742b57dc9..0000000000000000000000000000000000000000 --- a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec_opt/io.cc +++ /dev/null @@ -1,84 +0,0 @@ -/*************************************************************************** - *cr - *cr (C) Copyright 2010 The Board of Trustees of the - *cr University of Illinois - *cr All Rights Reserved - *cr - ***************************************************************************/ - -/* I/O routines for reading and writing matrices in column-major - * layout - */ - -#include <fstream> -#include <iostream> -#include <vector> - -char *readFile(const char *fileName) { - std::fstream f(fileName, std::fstream::in); - if (!f.good()) { - std::cerr << "Error Reading File!!" << std::endl; - return NULL; - } - - f.seekg(0, std::ios::end); - int length = f.tellg(); - f.seekg(0, std::ios::beg); - - char *buffer; - - if (length > 0) { - buffer = new char[length]; - f.read(buffer, length); - buffer[length - 1] = 0; - } else { - buffer = new char; - buffer[0] = 0; - } - - f.close(); - - return buffer; -} - -bool readColMajorMatrixFile(const char *fn, int &nr_row, int &nr_col, - std::vector<float> &v) { - std::cerr << "Opening file:" << fn << std::endl; - std::fstream f(fn, std::fstream::in); - if (!f.good()) { - return false; - } - - // Read # of rows and cols - f >> nr_row; - f >> nr_col; - - float data; - std::cerr << "Matrix dimension: " << nr_row << "x" << nr_col << std::endl; - while (f.good()) { - f >> data; - v.push_back(data); - } - v.pop_back(); // remove the duplicated last element - return true; -} - -bool writeColMajorMatrixFile(const char *fn, int nr_row, int nr_col, - std::vector<float> &v) { - std::cerr << "Opening file:" << fn << " for write." << std::endl; - std::fstream f(fn, std::fstream::out); - if (!f.good()) { - return false; - } - - // Read # of rows and cols - f << nr_row << " " << nr_col << " "; - - float data; - std::cerr << "Matrix dimension: " << nr_row << "x" << nr_col << std::endl; - for (int i = 0; i < v.size(); ++i) { - f << v[i] << ' '; - } - f << "\n"; - return true; -} diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec_opt/kernel.cl b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec_opt/kernel.cl deleted file mode 100644 index cc6e708148f40c80186004d3febd66988c67ae37..0000000000000000000000000000000000000000 --- a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec_opt/kernel.cl +++ /dev/null @@ -1,86 +0,0 @@ -/*************************************************************************** - *cr - *cr (C) Copyright 2010 The Board of Trustees of the - *cr University of Illinois - *cr All Rights Reserved - *cr - ***************************************************************************/ - -/* - * Kernel of dense matrix-matrix multiplication kernel. - */ - -// Parameters of tile sizes -#define TILE_N 8 -#define TILE_TB_HEIGHT 8 -#define TILE_M (TILE_N*TILE_TB_HEIGHT) - -__kernel void mysgemmNT( __global const float *A, int lda, __global const float *B, int ldb, __global float* C, int ldc, int k, float alpha, float beta ) -{ - - float c[TILE_N]; - for (int i=0; i < TILE_N; i++) - c[i] = 0.0f; - - int mid = get_local_id(1)*get_local_size(0)+get_local_id(0); - int m = get_group_id(0) * TILE_M + mid; - - int b_base = 0; - - for (int i = 0; i < k; i+=TILE_TB_HEIGHT) { - float a; - b_base = get_group_id(1) * TILE_N + i * ldb; - - for (int j = 0; j < TILE_TB_HEIGHT; j++) { - a = A[m + (i+j)*lda]; - for (int kk = 0; kk < TILE_N; kk++) - c[kk] += a * B[b_base + j * ldb + kk]; - - } - } - int t = ldc * get_group_id(1) * TILE_N + m; - for (int i = 0; i < TILE_N; i++) { - C[t+i*ldc] = C[t+i*ldc] * beta + alpha * c[i]; - } -/* - Will be substituted by this kernel at the llvm level - - // Partial results - floatn cp = (floatn)(0.0f); - - int mid = get_local_id(1)*get_local_size(0)+get_local_id(0); - int m = get_group_id(0) * TILE_M + mid; - - int b_base = 0; - - for (int i = 0; i < k; i+=TILE_TB_HEIGHT) { - float a; - b_base = get_group_id(1) * TILE_N + i * ldb; - - for (int j = 0; j < TILE_TB_HEIGHT; j++) { - a = A[m + (i+j)*lda]; - cp += a * vloadn(0, B + b_base + j * ldb); - } - } - - cp = alpha * cp; - float c[TILE_N]; - c[0] = cp.s0; - c[1] = cp.s1; - c[2] = cp.s2; - c[3] = cp.s3; - c[4] = cp.s4; - c[5] = cp.s5; - c[6] = cp.s6; - c[7] = cp.s7; - - int t = ldc * get_group_id(1) * TILE_N + m; - for (int i = 0; i < TILE_N; i++) { - C[t+i*ldc] = C[t+i*ldc] * beta + c[i]; - } - -*/ - -*/ - -} diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec_opt/main.cc b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec_opt/main.cc deleted file mode 100644 index 8fbc45e08a9e2fd1e3af6cc03360086b354665d7..0000000000000000000000000000000000000000 --- a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec_opt/main.cc +++ /dev/null @@ -1,227 +0,0 @@ -/*************************************************************************** - *cr - *cr (C) Copyright 2010 The Board of Trustees of the - *cr University of Illinois - *cr All Rights Reserved - *cr - ***************************************************************************/ - -/* - * Main entry of dense matrix-matrix multiplication kernel - */ - -#include <hpvm.h> -#include <iostream> -#include <malloc.h> -#include <math.h> -#include <parboil.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sys/time.h> -#include <vector> - -// I/O routines -extern bool readColMajorMatrixFile(const char *fn, int &nr_row, int &nr_col, - std::vector<float> &v); -extern bool writeColMajorMatrixFile(const char *fn, int, int, - std::vector<float> &); -extern char *readFile(const char *); - -// Parameters of tile sizes -#define TILE_N 8 -#define TILE_TB_HEIGHT 8 -#define TILE_M (TILE_N * TILE_TB_HEIGHT) - -#define CHECK_ERROR(errorMessage) \ - if (clStatus != CL_SUCCESS) { \ - std::cout << errorMessage << " Error!\n"; \ - std::cout << "Line: " << __LINE__ << "\n"; \ - exit(1); \ - } - -void mysgemmNT(float *A, int lda, float *B, int ldb, float *C, int ldc, int k, - float alpha, float beta) { - __hpvm__hint(hpvm::SPIR_TARGET); - __hpvm__attributes(3, A, B, C, 1, C); - - float c[TILE_N]; - for (int i = 0; i < TILE_N; i++) - c[i] = 0.0f; - - int mid = get_local_id(1) * get_local_size(0) + get_local_id(0); - int m = get_group_id(0) * TILE_M + mid; - - int b_base = 0; - - for (int i = 0; i < k; i += TILE_TB_HEIGHT) { - float a; - b_base = get_group_id(1) * TILE_N + i * ldb; - - for (int j = 0; j < TILE_TB_HEIGHT; j++) { - a = A[m + (i + j) * lda]; - for (int kk = 0; kk < TILE_N; kk++) - c[kk] += a * B[b_base + j * ldb + kk]; - } - } - int t = ldc * get_group_id(1) * TILE_N + m; - for (int i = 0; i < TILE_N; i++) { - C[t + i * ldc] = C[t + i * ldc] * beta + alpha * c[i]; - } - /* - Will be substituted by this kernel at the llvm level - - // Partial results - floatn cp = (floatn)(0.0f); - - int mid = get_local_id(1)*get_local_size(0)+get_local_id(0); - int m = get_group_id(0) * TILE_M + mid; - - int b_base = 0; - - for (int i = 0; i < k; i+=TILE_TB_HEIGHT) { - float a; - b_base = get_group_id(1) * TILE_N + i * ldb; - - for (int j = 0; j < TILE_TB_HEIGHT; j++) { - a = A[m + (i+j)*lda]; - cp += a * vloadn(0, B + b_base + j * ldb); - } - } - - cp = alpha * cp; - float c[TILE_N]; - c[0] = cp.s0; - c[1] = cp.s1; - c[2] = cp.s2; - c[3] = cp.s3; - c[4] = cp.s4; - c[5] = cp.s5; - c[6] = cp.s6; - c[7] = cp.s7; - - int t = ldc * get_group_id(1) * TILE_N + m; - for (int i = 0; i < TILE_N; i++) { - C[t+i*ldc] = C[t+i*ldc] * beta + c[i]; - } - - */ -} - -__attribute__((noinline)) void basicSgemm(char transa, char transb, int m, - int n, int k, float alpha, float *A, - size_t bytesA, int lda, float *B, - size_t bytesB, int ldb, float beta, - float *C, size_t bytesC, int ldc) { - if ((transa != 'N') && (transa != 'n')) { - std::cerr << "unsupported value of 'transa' in regtileSgemm()" << std::endl; - return; - } - - if ((transb != 'T') && (transb != 't')) { - std::cerr << "unsupported value of 'transb' in regtileSgemm()" << std::endl; - return; - } - - // In this code we assume the matrix sizes are multiple of tile size - if ((m % TILE_M) || (n % TILE_N)) { - std::cerr << "unsupported size of matrix. m should be multiple of " - << TILE_M << "; n should be multiple of " << TILE_N << std::endl; - return; - } - - // unsigned db[2] = {TILE_SZ/VEC_SZ,TILE_SZ}; - // unsigned dg[2] = {m/TILE_SZ*db[0],n/TILE_SZ*db[1]}; - unsigned db[2] = {TILE_N, TILE_TB_HEIGHT}; - unsigned dg[2] = {m * TILE_N / TILE_M, n * TILE_TB_HEIGHT / TILE_N}; - - void *sgemmDFG = __hpvm__node(mysgemmNT, 2, 2, db[0], db[1], dg[0] / db[0], - dg[1] / db[1], 12, A, bytesA, lda, B, bytesB, - ldb, C, bytesC, ldc, k, alpha, beta, 0); - __hpvm__wait(sgemmDFG); -} - -int main(int argc, char *argv[]) { - - struct pb_Parameters *params; - struct pb_TimerSet timers; - - size_t A_sz, B_sz, C_sz; - int matArow, matAcol; - int matBrow, matBcol; - std::vector<float> matA, matBT; - - /* Read command line. Expect 3 inputs: A, B and B^T - in column-major layout*/ - params = pb_ReadParameters(&argc, argv); - if ((params->inpFiles[0] == NULL) || (params->inpFiles[1] == NULL) || - (params->inpFiles[2] == NULL) || (params->inpFiles[3] != NULL)) { - fprintf(stderr, "Expecting three input filenames\n"); - exit(-1); - } - - /* Read in data */ - // load A - readColMajorMatrixFile(params->inpFiles[0], matArow, matAcol, matA); - - // load B^T - readColMajorMatrixFile(params->inpFiles[2], matBcol, matBrow, matBT); - - pb_InitializeTimerSet(&timers); - __hpvm__init(); - - pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); - // copy A to device memory - A_sz = matArow * matAcol * sizeof(float); - B_sz = matBrow * matBcol * sizeof(float); - - // allocate space for C - C_sz = matArow * matBcol * sizeof(float); - - // OpenCL memory allocation - std::vector<float> matC(matArow * matBcol); - - pb_SwitchToTimer(&timers, hpvm_TimerID_MEM_TRACK); - llvm_hpvm_track_mem(&matA.front(), A_sz); - llvm_hpvm_track_mem(&matBT.front(), B_sz); - llvm_hpvm_track_mem(&matC.front(), C_sz); - - // Copy A and B^T into device memory - pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); - - for (size_t i = 0; i < matC.size(); i++) - matC[i] = 0.0f; - - pb_SwitchToTimer(&timers, pb_TimerID_NONE); - - // Use standard sgemm interface - basicSgemm('N', 'T', matArow, matBcol, matAcol, 1.0f, &matA.front(), A_sz, - matArow, &matBT.front(), B_sz, matBcol, 0.0f, &matC.front(), C_sz, - matArow); - - pb_SwitchToTimer(&timers, pb_TimerID_COPY); - llvm_hpvm_request_mem(&matC.front(), C_sz); - - pb_SwitchToTimer(&timers, hpvm_TimerID_MEM_UNTRACK); - llvm_hpvm_untrack_mem(&matA.front()); - llvm_hpvm_untrack_mem(&matBT.front()); - llvm_hpvm_untrack_mem(&matC.front()); - pb_SwitchToTimer(&timers, pb_TimerID_NONE); - - pb_PrintTimerSet(&timers); - __hpvm__cleanup(); - - if (params->outFile) { - - /* Write C to file */ - // pb_SwitchToTimer(&timers, pb_TimerID_IO); - writeColMajorMatrixFile(params->outFile, matArow, matBcol, matC); - } - - double GPUtime = pb_GetElapsedTime(&(timers.timers[pb_TimerID_KERNEL])); - std::cout << "GFLOPs = " << 2. * matArow * matBcol * matAcol / GPUtime / 1e9 - << std::endl; - pb_FreeParameters(params); - - return 0; -} diff --git a/hpvm/test/parboil/benchmarks/stencil/src/hpvm_vec/common.h b/hpvm/test/parboil/benchmarks/stencil/src/hpvm_vec/common.h deleted file mode 100644 index 12a6d131c29067073fa79f09c4e6f91b8662969c..0000000000000000000000000000000000000000 --- a/hpvm/test/parboil/benchmarks/stencil/src/hpvm_vec/common.h +++ /dev/null @@ -1,15 +0,0 @@ -/*************************************************************************** - *cr - *cr (C) Copyright 2010 The Board of Trustees of the - *cr University of Illinois - *cr All Rights Reserved - *cr - ***************************************************************************/ - -#ifndef _COMMON_H_ -#define _COMMON_H_ -//#define Index3D(_nx,_ny,_i,_j,_k) ((_i)+_nx*((_j)+_ny*(_k))) -// +3 for padding -#define Index3D(_nx, _ny, _i, _j, _k) ((_i) + _nx * ((_j) + _ny * (_k)) + 3) -#define TCF 4 -#endif diff --git a/hpvm/test/parboil/benchmarks/stencil/src/hpvm_vec/stencil.c b/hpvm/test/parboil/benchmarks/stencil/src/hpvm_vec/stencil.c deleted file mode 100644 index 35c5ed960c2031b0b84124bbdd1aeb95042625ee..0000000000000000000000000000000000000000 --- a/hpvm/test/parboil/benchmarks/stencil/src/hpvm_vec/stencil.c +++ /dev/null @@ -1,176 +0,0 @@ - -/*************************************************************************** - *cr - *cr (C) Copyright 2010 The Board of Trustees of the - *cr University of Illinois - *cr All Rights Reserved - *cr - ***************************************************************************/ - -#include "common.h" -#include "file.h" -#include <hpvm.h> -#include <parboil.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -static int read_data(float *A0, int nx, int ny, int nz, FILE *fp) { - int s = 0; - int i, j, k; - for (i = 0; i < nz; i++) { - for (j = 0; j < ny; j++) { - for (k = 0; k < nx; k++) { - fread(A0 + s, sizeof(float), 1, fp); - s++; - } - } - } - return 0; -} - -void naive_kernel(float c0, float c1, float *A0, float *Anext, int nx, int ny, - int nz) { - __hpvm__attributes(2, A0, Anext, 1, Anext); - int i = get_global_id(0) + 1; - int j = get_global_id(1) + 1; - int k = get_global_id(2) + 1; - - if (i < nx - 1) { - Anext[Index3D(nx, ny, i, j, k)] = c1 * (A0[Index3D(nx, ny, i, j, k + 1)] + - A0[Index3D(nx, ny, i, j, k - 1)] + - A0[Index3D(nx, ny, i, j + 1, k)] + - A0[Index3D(nx, ny, i, j - 1, k)] + - A0[Index3D(nx, ny, i + 1, j, k)] + - A0[Index3D(nx, ny, i - 1, j, k)]) - - A0[Index3D(nx, ny, i, j, k)] * c0; - } -} - -int main(int argc, char **argv) { - struct pb_TimerSet timers; - struct pb_Parameters *parameters; - - printf("OpenCL accelerated 7 points stencil codes****\n"); - printf("Author: Li-Wen Chang <lchang20@illinois.edu>\n"); - parameters = pb_ReadParameters(&argc, argv); - - /*pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);*/ - - // declaration - int nx, ny, nz; - size_t size; - int iteration; - float c0 = 1.0 / 6.0; - float c1 = 1.0 / 6.0 / 6.0; - - if (argc < 5) { - printf("Usage: probe nx ny nz t\n" - "nx: the grid size x\n" - "ny: the grid size y\n" - "nz: the grid size z\n" - "t: the iteration time\n"); - return -1; - } - - nx = atoi(argv[1]); - if (nx < 1) - return -1; - ny = atoi(argv[2]); - if (ny < 1) - return -1; - nz = atoi(argv[3]); - if (nz < 1) - return -1; - iteration = atoi(argv[4]); - if (iteration < 1) - return -1; - - // host data - float *h_A0; - float *h_Anext; - - // load data from files - - size = nx * ny * nz; - - // Padding in the beginning to get aligned loads and stores - size = size + 3; - - h_A0 = (float *)malloc(sizeof(float) * size); - h_Anext = (float *)malloc(sizeof(float) * size); - - /*pb_SwitchToTimer(&timers, pb_TimerID_IO);*/ - FILE *fp = fopen(parameters->inpFiles[0], "rb"); - read_data(h_A0 + 3, nx, ny, nz, fp); - fclose(fp); - - pb_InitializeTimerSet(&timers); - __hpvm__init(); - - pb_SwitchToTimer(&timers, hpvm_TimerID_MEM_TRACK); - llvm_hpvm_track_mem(h_A0, sizeof(float) * size); - llvm_hpvm_track_mem(h_Anext, sizeof(float) * size); - - pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); - - memcpy(h_Anext, h_A0, sizeof(float) * size); - - // only use 1D thread block - int tx = 256 / TCF; - int block[3] = {tx, 1, 1}; - int grid[3] = {(nx - 2 + TCF * tx - 1) / (TCF * tx) * tx, ny - 2, nz - 2}; - // size_t grid[3] = {nx-2,ny-2,nz-2}; - size_t offset[3] = {1, 1, 1}; - - printf("grid(%d, %d, %d), block(%d, %d, %d)\n", grid[0], grid[1], grid[2], - block[0], block[1], block[2]); - // main execution - - int t; - size_t bytes = size * sizeof(float); - printf("A[126,1,1] = %f\n", h_A0[Index3D(nx, ny, 126, 1, 1)]); - printf("A[125,1,1] = %f\n", h_A0[Index3D(nx, ny, 125, 1, 1)]); - for (t = 0; t < iteration; t++) { - pb_SwitchToTimer(&timers, pb_TimerID_NONE); - unsigned stencilDFG = __hpvm__node( - naive_kernel, 2, 3, block[0], block[1], block[2], grid[0] / block[0], - grid[1] / block[1], grid[2] / block[2], 9, (float)c0, (float)c1, h_A0, - bytes, h_Anext, bytes, nx, ny, nz, 0); - __hpvm__wait(stencilDFG); - // printf("iteration %d\n",t); - pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); - float *h_temp = h_A0; - h_A0 = h_Anext; - h_Anext = h_temp; - } - - float *h_temp = h_A0; - h_A0 = h_Anext; - h_Anext = h_temp; - pb_SwitchToTimer(&timers, pb_TimerID_COPY); - llvm_hpvm_request_mem(h_Anext, bytes); - printf("A[126,1,1] = %f\n", h_Anext[Index3D(nx, ny, 126, 1, 1)]); - printf("A[125,1,1] = %f\n", h_Anext[Index3D(nx, ny, 125, 1, 1)]); - - pb_SwitchToTimer(&timers, hpvm_TimerID_MEM_UNTRACK); - - llvm_hpvm_untrack_mem(h_A0); - llvm_hpvm_untrack_mem(h_Anext); - - pb_SwitchToTimer(&timers, pb_TimerID_NONE); - pb_PrintTimerSet(&timers); - - __hpvm__cleanup(); - - if (parameters->outFile) { - /*pb_SwitchToTimer(&timers, pb_TimerID_IO);*/ - outputData(parameters->outFile, h_Anext + 3, nx, ny, nz); - } - /*pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);*/ - free(h_A0); - free(h_Anext); - pb_FreeParameters(parameters); - - return 0; -}