From 5c5fcb3a9c206e49816c8a532f890029a8c2cbf5 Mon Sep 17 00:00:00 2001
From: Akash Kothari <akashk4@tyler.cs.illinois.edu>
Date: Mon, 13 Jan 2020 08:24:53 -0600
Subject: [PATCH] Adding ported sgemm's visc version benchmark

---
 .../parboil/benchmarks/sgemm/src/visc/main.cc | 128 ++++++++++++++++--
 1 file changed, 117 insertions(+), 11 deletions(-)

diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/visc/main.cc b/hpvm/test/parboil/benchmarks/sgemm/src/visc/main.cc
index 6f4c8633fb..22d9098646 100644
--- a/hpvm/test/parboil/benchmarks/sgemm/src/visc/main.cc
+++ b/hpvm/test/parboil/benchmarks/sgemm/src/visc/main.cc
@@ -37,14 +37,36 @@ extern char* readFile(const char*);
      exit(1);                               \
   }
 
-void mysgemmNT( float* A, int lda, float* B, int ldb, float* C, int ldc, int k, float alpha, float beta )
-{
+typedef struct __attribute__((__packed__)) {
+    float* A; size_t bytes_A;
+    int lda;
+    float* B; size_t bytes_B;
+    int ldb;
+    float* C; size_t bytes_C;
+    int ldc;
+    int k; float alpha; float beta;
+    size_t dim_X1, dim_Y1, dim_X2, dim_Y2;
+} RootIn;
+
+void mysgemmNT(
+    float* A, size_t bytes_A, int lda, float* B, size_t bytes_B, int ldb, float* C, size_t bytes_C,
+    int ldc, int k, float alpha, float beta
+) {
     __visc__hint(visc::DEVICE);
     __visc__attributes(3, A, B, C, 1, C);
-    float c = 0.0f;
-    int m = get_global_id(0);
-    int n = get_global_id(1);
 
+    void* thisNode = __visc__getNode();
+    void* parentNode = __visc__getParentNode(thisNode);
+    int lx = __visc__getNodeInstanceID_x(thisNode);
+    int ly = __visc__getNodeInstanceID_y(thisNode);
+    int gx = __visc__getNodeInstanceID_x(parentNode);
+    int gy = __visc__getNodeInstanceID_y(parentNode);
+    int gridx = __visc__getNumNodeInstances_x(thisNode);
+    int gridy = __visc__getNumNodeInstances_y(thisNode);
+    int m = gx * gridx + lx;
+    int n = gy * gridy + ly;
+
+    float c = 0.0f;
     for (int i = 0; i < k; ++i) {
         float a = A[m + i * lda];
         float b = B[n + i * ldb];
@@ -53,8 +75,83 @@ void mysgemmNT( float* A, int lda, float* B, int ldb, float* C, int ldc, int k,
     C[m+n*ldc] = C[m+n*ldc] * beta + alpha * c;
 }
 
-__attribute__((noinline)) void basicSgemm( char transa, char transb, int m, int n, int k, float alpha, float* A, size_t bytesA, int lda, float* B, size_t bytesB, int ldb, float beta, float* C, size_t bytesC, int ldc )
-{
+void basicSgemmLvl1(
+    float* A, size_t bytes_A, int lda, float* B, size_t bytes_B, int ldb, float* C, size_t bytes_C, int ldc,
+    int k, float alpha, float beta, size_t dim_X1, size_t dim_Y1
+) {
+    __visc__hint(visc::DEVICE);
+    __visc__attributes(3, A, B, C, 1, C);
+    void* sgemm_node = __visc__createNodeND(2, mysgemmNT, (size_t) dim_X1, (size_t) dim_Y1);
+    __visc__bindIn(sgemm_node, 0, 0, 0);
+    __visc__bindIn(sgemm_node, 1, 1, 0);
+    __visc__bindIn(sgemm_node, 2, 2, 0);
+    __visc__bindIn(sgemm_node, 3, 3, 0);
+    __visc__bindIn(sgemm_node, 4, 4, 0);
+    __visc__bindIn(sgemm_node, 5, 5, 0);
+    __visc__bindIn(sgemm_node, 6, 6, 0);
+    __visc__bindIn(sgemm_node, 7, 7, 0);
+    __visc__bindIn(sgemm_node, 8, 8, 0);
+    __visc__bindIn(sgemm_node, 9, 9, 0);
+    __visc__bindIn(sgemm_node, 10, 10, 0);
+    __visc__bindIn(sgemm_node, 11, 11, 0);
+}
+
+void basicSgemmLvl2(
+    float* A, size_t bytes_A, int lda, float* B, size_t bytes_B, int ldb, float* C, size_t bytes_C, int ldc,
+    int k, float alpha, float beta,
+    size_t dim_X1, size_t dim_Y1, size_t dim_X2, size_t dim_Y2
+) {
+    __visc__hint(visc::CPU_TARGET);
+    __visc__attributes(3, A, B, C, 1, C);
+    void* sgemm_node = __visc__createNodeND(2, basicSgemmLvl1, (size_t) dim_X2, (size_t) dim_Y2);
+    __visc__bindIn(sgemm_node, 0, 0, 0);
+    __visc__bindIn(sgemm_node, 1, 1, 0);
+    __visc__bindIn(sgemm_node, 2, 2, 0);
+    __visc__bindIn(sgemm_node, 3, 3, 0);
+    __visc__bindIn(sgemm_node, 4, 4, 0);
+    __visc__bindIn(sgemm_node, 5, 5, 0);
+    __visc__bindIn(sgemm_node, 6, 6, 0);
+    __visc__bindIn(sgemm_node, 7, 7, 0);
+    __visc__bindIn(sgemm_node, 8, 8, 0);
+    __visc__bindIn(sgemm_node, 9, 9, 0);
+    __visc__bindIn(sgemm_node, 10, 10, 0);
+    __visc__bindIn(sgemm_node, 11, 11, 0);
+    __visc__bindIn(sgemm_node, 12, 12, 0);
+    __visc__bindIn(sgemm_node, 13, 13, 0);
+}
+
+// A wrapper level used in codegen for some backends
+void basicSgemmLvl3(
+    float* A, size_t bytes_A, int lda, float* B, size_t bytes_B, int ldb, float* C, size_t bytes_C, int ldc,
+    int k, float alpha, float beta,
+    size_t dim_X1, size_t dim_Y1, size_t dim_X2, size_t dim_Y2
+) {
+    __visc__hint(visc::CPU_TARGET);
+    __visc__attributes(3, A, B, C, 1, C);
+    void* sgemm_node = __visc__createNodeND(0, basicSgemmLvl2);
+    __visc__bindIn(sgemm_node, 0, 0, 0);
+    __visc__bindIn(sgemm_node, 1, 1, 0);
+    __visc__bindIn(sgemm_node, 2, 2, 0);
+    __visc__bindIn(sgemm_node, 3, 3, 0);
+    __visc__bindIn(sgemm_node, 4, 4, 0);
+    __visc__bindIn(sgemm_node, 5, 5, 0);
+    __visc__bindIn(sgemm_node, 6, 6, 0);
+    __visc__bindIn(sgemm_node, 7, 7, 0);
+    __visc__bindIn(sgemm_node, 8, 8, 0);
+    __visc__bindIn(sgemm_node, 9, 9, 0);
+    __visc__bindIn(sgemm_node, 10, 10, 0);
+    __visc__bindIn(sgemm_node, 11, 11, 0);
+    __visc__bindIn(sgemm_node, 12, 12, 0);
+    __visc__bindIn(sgemm_node, 13, 13, 0);
+    __visc__bindIn(sgemm_node, 14, 14, 0);
+    __visc__bindIn(sgemm_node, 15, 15, 0);
+}
+
+__attribute__((noinline)) void basicSgemm(
+    char transa, char transb, int m, int n, int k, float alpha,
+    float* A, size_t bytesA, int lda, float* B, size_t bytesB, int ldb, float beta,
+    float* C, size_t bytesC, int ldc
+) {
     if ((transa != 'N') && (transa != 'n')) {
         std::cerr << "unsupported value of 'transa' in regtileSgemm()" << std::endl;
         return;
@@ -71,10 +168,18 @@ __attribute__((noinline)) void basicSgemm( char transa, char transb, int m, int
                   << "; n should be multiple of " << TILE_SZ << std::endl;
     }
 
-    unsigned long db[2] = {TILE_SZ,TILE_SZ};
-    unsigned long dg[2] = {m/TILE_SZ*db[0],n/TILE_SZ*db[1]};
-
-    void* sgemmDFG = __visc__node(mysgemmNT, 2, 2, db[0], db[1], dg[0]/db[0], dg[1]/db[1], 12, A, bytesA, lda, B, bytesB, ldb, C, bytesC, ldc, k, alpha, beta, 0);
+    size_t db[2] = {TILE_SZ,TILE_SZ}, dg[2] = {m/TILE_SZ*db[0],n/TILE_SZ*db[1]};
+
+    void *root_in = malloc(sizeof(RootIn));
+    RootIn root_in_local = {
+        A, bytesA, lda,
+        B, bytesB, ldb,
+        C, bytesC, ldc,
+        k, alpha, beta,
+        db[0], db[1], dg[0]/db[0], dg[1]/db[1]
+    };
+    *(RootIn *)root_in = root_in_local;
+    void* sgemmDFG = __visc__launch(0, basicSgemmLvl3, root_in);
     __visc__wait(sgemmDFG);
 }
 
@@ -174,3 +279,4 @@ int main (int argc, char *argv[]) {
 
     return 0;
 }
+
-- 
GitLab