diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_opt/Makefile b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_opt/Makefile
deleted file mode 100644
index 2234bf54e1e665f95b38dd0e25c2fe1b5539ce4e..0000000000000000000000000000000000000000
--- a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_opt/Makefile
+++ /dev/null
@@ -1,8 +0,0 @@
-# (c) 2010 The Board of Trustees of the University of Illinois.
-
-LANGUAGE=hpvm
-SRCDIR_OBJS=io.ll #compute_gold.o
-HPVM_OBJS=main.hpvm.ll
-APP_CUDALDFLAGS=-lm -lstdc++
-APP_CFLAGS=-ffast-math -O3
-APP_CXXFLAGS=-ffast-math -O3
diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_opt/io.cc b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_opt/io.cc
deleted file mode 100644
index 04744f404ebaf6e669c2bbe91600519742b57dc9..0000000000000000000000000000000000000000
--- a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_opt/io.cc
+++ /dev/null
@@ -1,84 +0,0 @@
-/***************************************************************************
- *cr
- *cr            (C) Copyright 2010 The Board of Trustees of the
- *cr                        University of Illinois
- *cr                         All Rights Reserved
- *cr
- ***************************************************************************/
-
-/* I/O routines for reading and writing matrices in column-major
- * layout
- */
-
-#include <fstream>
-#include <iostream>
-#include <vector>
-
-char *readFile(const char *fileName) {
-  std::fstream f(fileName, std::fstream::in);
-  if (!f.good()) {
-    std::cerr << "Error Reading File!!" << std::endl;
-    return NULL;
-  }
-
-  f.seekg(0, std::ios::end);
-  int length = f.tellg();
-  f.seekg(0, std::ios::beg);
-
-  char *buffer;
-
-  if (length > 0) {
-    buffer = new char[length];
-    f.read(buffer, length);
-    buffer[length - 1] = 0;
-  } else {
-    buffer = new char;
-    buffer[0] = 0;
-  }
-
-  f.close();
-
-  return buffer;
-}
-
-bool readColMajorMatrixFile(const char *fn, int &nr_row, int &nr_col,
-                            std::vector<float> &v) {
-  std::cerr << "Opening file:" << fn << std::endl;
-  std::fstream f(fn, std::fstream::in);
-  if (!f.good()) {
-    return false;
-  }
-
-  // Read # of rows and cols
-  f >> nr_row;
-  f >> nr_col;
-
-  float data;
-  std::cerr << "Matrix dimension: " << nr_row << "x" << nr_col << std::endl;
-  while (f.good()) {
-    f >> data;
-    v.push_back(data);
-  }
-  v.pop_back(); // remove the duplicated last element
-  return true;
-}
-
-bool writeColMajorMatrixFile(const char *fn, int nr_row, int nr_col,
-                             std::vector<float> &v) {
-  std::cerr << "Opening file:" << fn << " for write." << std::endl;
-  std::fstream f(fn, std::fstream::out);
-  if (!f.good()) {
-    return false;
-  }
-
-  // Read # of rows and cols
-  f << nr_row << " " << nr_col << " ";
-
-  float data;
-  std::cerr << "Matrix dimension: " << nr_row << "x" << nr_col << std::endl;
-  for (int i = 0; i < v.size(); ++i) {
-    f << v[i] << ' ';
-  }
-  f << "\n";
-  return true;
-}
diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_opt/kernel.cl b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_opt/kernel.cl
deleted file mode 100644
index ae0f5b60f4b800515bd84a04b02926acd625665c..0000000000000000000000000000000000000000
--- a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_opt/kernel.cl
+++ /dev/null
@@ -1,40 +0,0 @@
-/***************************************************************************
- *cr
- *cr            (C) Copyright 2010 The Board of Trustees of the
- *cr                        University of Illinois
- *cr                         All Rights Reserved
- *cr
- ***************************************************************************/
-
-/* 
- * Kernel of dense matrix-matrix multiplication kernel.
- */
-
-__kernel void mysgemmNT( __global const float *A, int lda, __global const float *B, int ldb, __global float* C, int ldc, int k, float alpha, float beta )
-{
-    // Partial results 
-    float c[TILE_N];
-    for (int i=0; i < TILE_N; i++)
-	c[i] = 0.0f;
-   
-    int mid = get_local_id(1)*get_local_size(0)+get_local_id(0);
-    int m = get_group_id(0) * TILE_M + mid;
-
-    int b_base = 0;
-
-    for (int i = 0; i < k; i+=TILE_TB_HEIGHT) {
-	float a; 
-        b_base = get_group_id(1) * TILE_N + i * ldb;
-
-	for (int j = 0; j < TILE_TB_HEIGHT; j++) {
-	    a = A[m + (i+j)*lda];
-	    for (int kk = 0; kk < TILE_N; kk++)
-		c[kk] += a * B[b_base + j * ldb + kk];
-
-	}
-    }
-    int t = ldc * get_group_id(1) * TILE_N + m;
-    for (int i = 0; i < TILE_N; i++) {
-	C[t+i*ldc] = C[t+i*ldc] * beta + alpha * c[i];
-    }
-}
diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_opt/main.cc b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_opt/main.cc
deleted file mode 100644
index a1db2e56a5c5639319d7be5f6a890d44c3a28421..0000000000000000000000000000000000000000
--- a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_opt/main.cc
+++ /dev/null
@@ -1,186 +0,0 @@
-/***************************************************************************
- *cr
- *cr            (C) Copyright 2010 The Board of Trustees of the
- *cr                        University of Illinois
- *cr                         All Rights Reserved
- *cr
- ***************************************************************************/
-
-/*
- * Main entry of dense matrix-matrix multiplication kernel
- */
-
-#include <hpvm.h>
-#include <iostream>
-#include <malloc.h>
-#include <math.h>
-#include <parboil.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/time.h>
-#include <vector>
-
-// I/O routines
-extern bool readColMajorMatrixFile(const char *fn, int &nr_row, int &nr_col,
-                                   std::vector<float> &v);
-extern bool writeColMajorMatrixFile(const char *fn, int, int,
-                                    std::vector<float> &);
-extern char *readFile(const char *);
-
-// Parameters of tile sizes
-#define TILE_N 16
-#define TILE_TB_HEIGHT 8
-#define TILE_M (TILE_N * TILE_TB_HEIGHT)
-
-#define CHECK_ERROR(errorMessage)                                              \
-  if (clStatus != CL_SUCCESS) {                                                \
-    std::cout << errorMessage << " Error!\n";                                  \
-    std::cout << "Line: " << __LINE__ << "\n";                                 \
-    exit(1);                                                                   \
-  }
-
-void mysgemmNT(float *A, int lda, float *B, int ldb, float *C, int ldc, int k,
-               float alpha, float beta) {
-  __hpvm__hint(hpvm::GPU_TARGET);
-  __hpvm__attributes(3, A, B, C, 1, C);
-
-  float c[TILE_N];
-  for (int i = 0; i < TILE_N; i++)
-    c[i] = 0.0f;
-
-  int mid = get_local_id(1) * get_local_size(0) + get_local_id(0);
-  int m = get_group_id(0) * TILE_M + mid;
-
-  int b_base = 0;
-
-  for (int i = 0; i < k; i += TILE_TB_HEIGHT) {
-    float a;
-    b_base = get_group_id(1) * TILE_N + i * ldb;
-
-    for (int j = 0; j < TILE_TB_HEIGHT; j++) {
-      a = A[m + (i + j) * lda];
-      for (int kk = 0; kk < TILE_N; kk++)
-        c[kk] += a * B[b_base + j * ldb + kk];
-    }
-  }
-  int t = ldc * get_group_id(1) * TILE_N + m;
-  for (int i = 0; i < TILE_N; i++) {
-    C[t + i * ldc] = C[t + i * ldc] * beta + alpha * c[i];
-  }
-}
-
-__attribute__((noinline)) void basicSgemm(char transa, char transb, int m,
-                                          int n, int k, float alpha, float *A,
-                                          size_t bytesA, int lda, float *B,
-                                          size_t bytesB, int ldb, float beta,
-                                          float *C, size_t bytesC, int ldc) {
-  if ((transa != 'N') && (transa != 'n')) {
-    std::cerr << "unsupported value of 'transa' in regtileSgemm()" << std::endl;
-    return;
-  }
-
-  if ((transb != 'T') && (transb != 't')) {
-    std::cerr << "unsupported value of 'transb' in regtileSgemm()" << std::endl;
-    return;
-  }
-
-  // In this code we assume the matrix sizes are multiple of tile size
-  if ((m % TILE_M) || (n % TILE_N)) {
-    std::cerr << "unsupported size of matrix. m should be multiple of "
-              << TILE_M << "; n should be multiple of " << TILE_N << std::endl;
-    return;
-  }
-
-  unsigned db[2] = {TILE_N, TILE_TB_HEIGHT};
-  //    unsigned dg[2] = {m*TILE_N/TILE_M,n*TILE_TB_HEIGHT/TILE_N};
-  unsigned dg[2] = {m * db[0] / TILE_M, n * db[1] / TILE_N};
-
-  unsigned sgemmDFG = __hpvm__node(mysgemmNT, 2, 2, db[0], db[1], dg[0] / db[0],
-                                   dg[1] / db[1], 12, A, bytesA, lda, B, bytesB,
-                                   ldb, C, bytesC, ldc, k, alpha, beta, 0);
-  __hpvm__wait(sgemmDFG);
-}
-
-int main(int argc, char *argv[]) {
-
-  struct pb_Parameters *params;
-  struct pb_TimerSet timers;
-
-  size_t A_sz, B_sz, C_sz;
-  int matArow, matAcol;
-  int matBrow, matBcol;
-  std::vector<float> matA, matBT;
-
-  /* Read command line. Expect 3 inputs: A, B and B^T
-     in column-major layout*/
-  params = pb_ReadParameters(&argc, argv);
-  if ((params->inpFiles[0] == NULL) || (params->inpFiles[1] == NULL) ||
-      (params->inpFiles[2] == NULL) || (params->inpFiles[3] != NULL)) {
-    fprintf(stderr, "Expecting three input filenames\n");
-    exit(-1);
-  }
-
-  /* Read in data */
-  // load A
-  readColMajorMatrixFile(params->inpFiles[0], matArow, matAcol, matA);
-
-  // load B^T
-  readColMajorMatrixFile(params->inpFiles[2], matBcol, matBrow, matBT);
-
-  pb_InitializeTimerSet(&timers);
-  __hpvm__init();
-
-  pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
-  // copy A to device memory
-  A_sz = matArow * matAcol * sizeof(float);
-  B_sz = matBrow * matBcol * sizeof(float);
-
-  // allocate space for C
-  C_sz = matArow * matBcol * sizeof(float);
-
-  // OpenCL memory allocation
-  std::vector<float> matC(matArow * matBcol);
-
-  llvm_hpvm_track_mem(&matA.front(), A_sz);
-  llvm_hpvm_track_mem(&matBT.front(), B_sz);
-  llvm_hpvm_track_mem(&matC.front(), C_sz);
-  // Copy A and B^T into device memory
-  pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
-
-  for (size_t i = 0; i < matC.size(); i++)
-    matC[i] = 0.0f;
-
-  pb_SwitchToTimer(&timers, pb_TimerID_NONE);
-
-  // Use standard sgemm interface
-  basicSgemm('N', 'T', matArow, matBcol, matAcol, 1.0f, &matA.front(), A_sz,
-             matArow, &matBT.front(), B_sz, matBcol, 0.0f, &matC.front(), C_sz,
-             matArow);
-
-  pb_SwitchToTimer(&timers, pb_TimerID_COPY);
-  llvm_hpvm_request_mem(&matC.front(), C_sz);
-
-  pb_SwitchToTimer(&timers, hpvm_TimerID_MEM_UNTRACK);
-  llvm_hpvm_untrack_mem(&matA.front());
-  llvm_hpvm_untrack_mem(&matBT.front());
-  llvm_hpvm_untrack_mem(&matC.front());
-  pb_SwitchToTimer(&timers, pb_TimerID_NONE);
-
-  pb_PrintTimerSet(&timers);
-  __hpvm__cleanup();
-
-  if (params->outFile) {
-
-    /* Write C to file */
-    // pb_SwitchToTimer(&timers, pb_TimerID_IO);
-    writeColMajorMatrixFile(params->outFile, matArow, matBcol, matC);
-  }
-
-  double GPUtime = pb_GetElapsedTime(&(timers.timers[pb_TimerID_KERNEL]));
-  std::cout << "GFLOPs = " << 2. * matArow * matBcol * matAcol / GPUtime / 1e9
-            << std::endl;
-  pb_FreeParameters(params);
-
-  return 0;
-}
diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_sh/Makefile b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_sh/Makefile
deleted file mode 100644
index f81bac47072bc017dcdcdccf373cdfbd0f21ceac..0000000000000000000000000000000000000000
--- a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_sh/Makefile
+++ /dev/null
@@ -1,9 +0,0 @@
-# (c) 2010 The Board of Trustees of the University of Illinois.
-
-LANGUAGE=hpvm
-SRCDIR_OBJS=io.ll #compute_gold.o
-HPVM_OBJS=main.hpvm.ll
-APP_CUDALDFLAGS=-lm -lstdc++
-APP_CFLAGS=-ffast-math -O3
-APP_CXXFLAGS=-ffast-math -O3
-APP_OPTFLAGS=-unroll-threshold=300 -loop-unroll -sroa
diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_sh/io.cc b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_sh/io.cc
deleted file mode 100644
index 04744f404ebaf6e669c2bbe91600519742b57dc9..0000000000000000000000000000000000000000
--- a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_sh/io.cc
+++ /dev/null
@@ -1,84 +0,0 @@
-/***************************************************************************
- *cr
- *cr            (C) Copyright 2010 The Board of Trustees of the
- *cr                        University of Illinois
- *cr                         All Rights Reserved
- *cr
- ***************************************************************************/
-
-/* I/O routines for reading and writing matrices in column-major
- * layout
- */
-
-#include <fstream>
-#include <iostream>
-#include <vector>
-
-char *readFile(const char *fileName) {
-  std::fstream f(fileName, std::fstream::in);
-  if (!f.good()) {
-    std::cerr << "Error Reading File!!" << std::endl;
-    return NULL;
-  }
-
-  f.seekg(0, std::ios::end);
-  int length = f.tellg();
-  f.seekg(0, std::ios::beg);
-
-  char *buffer;
-
-  if (length > 0) {
-    buffer = new char[length];
-    f.read(buffer, length);
-    buffer[length - 1] = 0;
-  } else {
-    buffer = new char;
-    buffer[0] = 0;
-  }
-
-  f.close();
-
-  return buffer;
-}
-
-bool readColMajorMatrixFile(const char *fn, int &nr_row, int &nr_col,
-                            std::vector<float> &v) {
-  std::cerr << "Opening file:" << fn << std::endl;
-  std::fstream f(fn, std::fstream::in);
-  if (!f.good()) {
-    return false;
-  }
-
-  // Read # of rows and cols
-  f >> nr_row;
-  f >> nr_col;
-
-  float data;
-  std::cerr << "Matrix dimension: " << nr_row << "x" << nr_col << std::endl;
-  while (f.good()) {
-    f >> data;
-    v.push_back(data);
-  }
-  v.pop_back(); // remove the duplicated last element
-  return true;
-}
-
-bool writeColMajorMatrixFile(const char *fn, int nr_row, int nr_col,
-                             std::vector<float> &v) {
-  std::cerr << "Opening file:" << fn << " for write." << std::endl;
-  std::fstream f(fn, std::fstream::out);
-  if (!f.good()) {
-    return false;
-  }
-
-  // Read # of rows and cols
-  f << nr_row << " " << nr_col << " ";
-
-  float data;
-  std::cerr << "Matrix dimension: " << nr_row << "x" << nr_col << std::endl;
-  for (int i = 0; i < v.size(); ++i) {
-    f << v[i] << ' ';
-  }
-  f << "\n";
-  return true;
-}
diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_sh/main.cc b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_sh/main.cc
deleted file mode 100644
index de0d473ed6fe6724ef81f99b13e02d0de29b103b..0000000000000000000000000000000000000000
--- a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_sh/main.cc
+++ /dev/null
@@ -1,350 +0,0 @@
-/***************************************************************************
- *cr
- *cr            (C) Copyright 2010 The Board of Trustees of the
- *cr                        University of Illinois
- *cr                         All Rights Reserved
- *cr
- ***************************************************************************/
-
-/*
- * Main entry of dense matrix-matrix multiplication kernel
- */
-
-#include <hpvm.h>
-#include <iostream>
-#include <malloc.h>
-#include <math.h>
-#include <parboil.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/time.h>
-#include <vector>
-
-// I/O routines
-extern bool readColMajorMatrixFile(const char *fn, int &nr_row, int &nr_col,
-                                   std::vector<float> &v);
-extern bool writeColMajorMatrixFile(const char *fn, int, int,
-                                    std::vector<float> &);
-extern char *readFile(const char *);
-
-// Parameters of tile sizes
-#define TILE_N 16
-#define TILE_TB_HEIGHT 8
-#define TILE_M (TILE_N * TILE_TB_HEIGHT)
-
-#define CHECK_ERROR(errorMessage)                                              \
-  if (clStatus != CL_SUCCESS) {                                                \
-    std::cout << errorMessage << " Error!\n";                                  \
-    std::cout << "Line: " << __LINE__ << "\n";                                 \
-    exit(1);                                                                   \
-  }
-
-typedef struct __attribute__((__packed__)) {
-  float *A;
-  size_t bytesA;
-  int lda;
-  float *B;
-  size_t bytesB;
-  int ldb;
-  float *C;
-  size_t bytesC;
-  int ldc;
-  int k;
-  float alpha;
-  float beta;
-  long block_x;
-  long block_y;
-  long grid_x;
-  long grid_y;
-} RootIn;
-
-void packData(RootIn *args, float *A, size_t bytesA, int lda, float *B,
-              size_t bytesB, int ldb, float *C, size_t bytesC, int ldc, int k,
-              float alpha, float beta, long block_x, long block_y, long grid_x,
-              long grid_y) {
-  args->A = A;
-  args->bytesA = bytesA;
-  args->lda = lda;
-  args->B = B;
-  args->bytesB = bytesB;
-  args->ldb = ldb;
-  args->C = C;
-  args->bytesC = bytesC;
-  args->ldc = ldc;
-  args->k = k;
-  args->alpha = alpha;
-  args->beta = beta;
-  args->block_x = block_x;
-  args->block_y = block_y;
-  args->grid_x = grid_x;
-  args->grid_y = grid_y;
-}
-
-void Allocation(long block_x, long block_y) {
-  void *shB = __hpvm__malloc(block_x * block_y * sizeof(float));
-  __hpvm__return(2, shB, block_x * block_y * sizeof(float));
-}
-
-void SgemmLeaf(float *A, size_t bytesA, int lda, float *B, size_t bytesB,
-               int ldb, float *C, size_t bytesC, int ldc, int k, float alpha,
-               float beta, float *shB, size_t bytesshB) {
-  __hpvm__hint(hpvm::DEVICE);
-  //__hpvm__hint(hpvm::SPIR_TARGET);
-  //__hpvm__hint(hpvm::GPU_TARGET);
-
-  __hpvm__attributes(3, A, B, C, 1, C);
-
-  void *thisNode = __hpvm__getNode();
-  void *parentNode = __hpvm__getParentNode(thisNode);
-
-  long lx = __hpvm__getNodeInstanceID_x(thisNode);
-  long ly = __hpvm__getNodeInstanceID_y(thisNode);
-
-  long gx = __hpvm__getNodeInstanceID_x(parentNode);
-  long gy = __hpvm__getNodeInstanceID_y(parentNode);
-
-  long dimx = __hpvm__getNumNodeInstances_x(thisNode);
-
-  float c[TILE_N];
-  for (int i = 0; i < TILE_N; i++)
-    c[i] = 0.0f;
-
-  int mid = ly * dimx + lx;
-  int m = gx * TILE_M + mid;
-  int n = gy * TILE_N + lx;
-
-  for (int i = 0; i < k; i += TILE_TB_HEIGHT) {
-    float a;
-    // shB[ly][lx] = B[n+(i+ly)*ldb];
-    shB[ly * dimx + lx] = B[n + (i + ly) * ldb];
-
-    __hpvm__barrier();
-    for (int j = 0; j < TILE_TB_HEIGHT; j++) {
-      a = A[m + (i + j) * lda];
-      for (int kk = 0; kk < TILE_N; kk++) {
-        // c[kk] += a * shB[j][kk];
-        c[kk] += a * shB[j * dimx + kk];
-      }
-    }
-    __hpvm__barrier();
-  }
-
-  int t = ldc * gy * TILE_N + m;
-  for (int i = 0; i < TILE_N; i++) {
-    C[t + i * ldc] = C[t + i * ldc] * beta + alpha * c[i];
-  }
-}
-
-// Work group node for sgemm - Creates allocation node and leaf (work item) node
-void SgemmTB(float *A, size_t bytesA, int lda, float *B, size_t bytesB, int ldb,
-             float *C, size_t bytesC, int ldc, int k, float alpha, float beta,
-             long block_x, long block_y) {
-  __hpvm__hint(hpvm::CPU_TARGET);
-  __hpvm__attributes(3, A, B, C, 1, C);
-  void *AllocationNode = __hpvm__createNodeND(0, Allocation);
-  void *SgemmLeafNode = __hpvm__createNodeND(2, SgemmLeaf, block_x, block_y);
-
-  // Bind edges
-  __hpvm__bindIn(SgemmLeafNode, 0, 0, 0);   // Bind A
-  __hpvm__bindIn(SgemmLeafNode, 1, 1, 0);   // Bind bytesA
-  __hpvm__bindIn(SgemmLeafNode, 2, 2, 0);   // Bind lda
-  __hpvm__bindIn(SgemmLeafNode, 3, 3, 0);   // Bind B
-  __hpvm__bindIn(SgemmLeafNode, 4, 4, 0);   // Bind bytesB
-  __hpvm__bindIn(SgemmLeafNode, 5, 5, 0);   // Bind ldb
-  __hpvm__bindIn(SgemmLeafNode, 6, 6, 0);   // Bind C
-  __hpvm__bindIn(SgemmLeafNode, 7, 7, 0);   // Bind bytesC
-  __hpvm__bindIn(SgemmLeafNode, 8, 8, 0);   // Bind ldc
-  __hpvm__bindIn(SgemmLeafNode, 9, 9, 0);   // Bind k
-  __hpvm__bindIn(SgemmLeafNode, 10, 10, 0); // Bind alpha
-  __hpvm__bindIn(SgemmLeafNode, 11, 11, 0); // Bind beta
-
-  __hpvm__bindIn(AllocationNode, 12, 0, 0); // Bind block_x
-  __hpvm__bindIn(AllocationNode, 13, 1, 0); // Bind block_y
-
-  // Create Edges between AllocationNode and BFSLeafNodeNode
-  __hpvm__edge(AllocationNode, SgemmLeafNode, 1, 0, 12, 0); // Edge local_B
-  __hpvm__edge(AllocationNode, SgemmLeafNode, 1, 1, 13,
-               0); // Edge bytes_local_B
-}
-
-// Root node for sgemm - Creates work group node
-void SgemmRoot(float *A, size_t bytesA, int lda,                    // 0-2
-               float *B, size_t bytesB, int ldb,                    // 3-5
-               float *C, size_t bytesC, int ldc,                    // 6-8
-               int k, float alpha, float beta,                      // 9-11
-               long block_x, long block_y, long grid_x, long grid_y // 12-15
-) {
-  __hpvm__hint(hpvm::CPU_TARGET);
-  __hpvm__attributes(3, A, B, C, 1, C);
-  void *SgemmTBNode = __hpvm__createNodeND(2, SgemmTB, grid_x, grid_y);
-
-  // Bind edges
-  __hpvm__bindIn(SgemmTBNode, 0, 0, 0);   // Bind A
-  __hpvm__bindIn(SgemmTBNode, 1, 1, 0);   // Bind bytesA
-  __hpvm__bindIn(SgemmTBNode, 2, 2, 0);   // Bind lda
-  __hpvm__bindIn(SgemmTBNode, 3, 3, 0);   // Bind B
-  __hpvm__bindIn(SgemmTBNode, 4, 4, 0);   // Bind bytesB
-  __hpvm__bindIn(SgemmTBNode, 5, 5, 0);   // Bind ldb
-  __hpvm__bindIn(SgemmTBNode, 6, 6, 0);   // Bind C
-  __hpvm__bindIn(SgemmTBNode, 7, 7, 0);   // Bind bytesC
-  __hpvm__bindIn(SgemmTBNode, 8, 8, 0);   // Bind ldc
-  __hpvm__bindIn(SgemmTBNode, 9, 9, 0);   // Bind k
-  __hpvm__bindIn(SgemmTBNode, 10, 10, 0); // Bind alpha
-  __hpvm__bindIn(SgemmTBNode, 11, 11, 0); // Bind beta
-  __hpvm__bindIn(SgemmTBNode, 12, 12, 0); // Bind block_x
-  __hpvm__bindIn(SgemmTBNode, 13, 13, 0); // Bind block_y
-}
-
-void SgemmWrapper(float *A, size_t bytesA, int lda,                    // 0-2
-                  float *B, size_t bytesB, int ldb,                    // 3-5
-                  float *C, size_t bytesC, int ldc,                    // 6-8
-                  int k, float alpha, float beta,                      // 9-11
-                  long block_x, long block_y, long grid_x, long grid_y // 12-15
-) {
-  __hpvm__hint(hpvm::CPU_TARGET);
-  __hpvm__attributes(3, A, B, C, 1, C);
-  void *SgemmRootNode = __hpvm__createNodeND(0, SgemmRoot);
-
-  // Bind edges
-  __hpvm__bindIn(SgemmRootNode, 0, 0, 0);   // Bind A
-  __hpvm__bindIn(SgemmRootNode, 1, 1, 0);   // Bind bytesA
-  __hpvm__bindIn(SgemmRootNode, 2, 2, 0);   // Bind lda
-  __hpvm__bindIn(SgemmRootNode, 3, 3, 0);   // Bind B
-  __hpvm__bindIn(SgemmRootNode, 4, 4, 0);   // Bind bytesB
-  __hpvm__bindIn(SgemmRootNode, 5, 5, 0);   // Bind ldb
-  __hpvm__bindIn(SgemmRootNode, 6, 6, 0);   // Bind C
-  __hpvm__bindIn(SgemmRootNode, 7, 7, 0);   // Bind bytesC
-  __hpvm__bindIn(SgemmRootNode, 8, 8, 0);   // Bind ldc
-  __hpvm__bindIn(SgemmRootNode, 9, 9, 0);   // Bind k
-  __hpvm__bindIn(SgemmRootNode, 10, 10, 0); // Bind alpha
-  __hpvm__bindIn(SgemmRootNode, 11, 11, 0); // Bind beta
-  __hpvm__bindIn(SgemmRootNode, 12, 12, 0); // Bind block_x
-  __hpvm__bindIn(SgemmRootNode, 13, 13, 0); // Bind block_y
-  __hpvm__bindIn(SgemmRootNode, 14, 14, 0); // Bind grid_x
-  __hpvm__bindIn(SgemmRootNode, 15, 15, 0); // Bind grid_y
-}
-
-// Creates root node for sgemm
-__attribute__((noinline)) void basicSgemm(struct pb_TimerSet *timers,
-                                          char transa, char transb, int m,
-                                          int n, int k, float alpha, float *A,
-                                          size_t bytesA, int lda, float *B,
-                                          size_t bytesB, int ldb, float beta,
-                                          float *C, size_t bytesC, int ldc) {
-  if ((transa != 'N') && (transa != 'n')) {
-    std::cerr << "unsupported value of 'transa' in regtileSgemm()" << std::endl;
-    return;
-  }
-
-  if ((transb != 'T') && (transb != 't')) {
-    std::cerr << "unsupported value of 'transb' in regtileSgemm()" << std::endl;
-    return;
-  }
-
-  // In this code we assume the matrix sizes are multiple of tile size
-  if ((m % TILE_M) || (n % TILE_N)) {
-    std::cerr << "unsupported size of matrix. m should be multiple of "
-              << TILE_M << "; n should be multiple of " << TILE_N << std::endl;
-    return;
-  }
-
-  //    unsigned db[2] = {TILE_N,TILE_TB_HEIGHT};
-  //    unsigned dg[2] = {m*TILE_N/TILE_M,n*TILE_TB_HEIGHT/TILE_N};
-
-  long block_x = TILE_N;
-  long block_y = TILE_TB_HEIGHT;
-  long grid_x = m / TILE_M;
-  long grid_y = n / TILE_N;
-
-  // Pack data in struct
-  RootIn *args = (RootIn *)malloc(sizeof(RootIn));
-  packData(args, A, bytesA, lda, B, bytesB, ldb, C, bytesC, ldc, k, alpha, beta,
-           block_x, block_y, grid_x, grid_y);
-
-  pb_SwitchToTimer(timers, hpvm_TimerID_COMPUTATION);
-  void *sgemmDFG = __hpvm__launch(0, SgemmWrapper, (void *)args);
-
-  __hpvm__wait(sgemmDFG);
-  pb_SwitchToTimer(timers, pb_TimerID_COMPUTE);
-}
-
-int main(int argc, char *argv[]) {
-
-  struct pb_Parameters *params;
-  struct pb_TimerSet timers;
-
-  size_t A_sz, B_sz, C_sz;
-  int matArow, matAcol;
-  int matBrow, matBcol;
-  std::vector<float> matA, matBT;
-
-  /* Read command line. Expect 3 inputs: A, B and B^T
-     in column-major layout*/
-  params = pb_ReadParameters(&argc, argv);
-  if ((params->inpFiles[0] == NULL) || (params->inpFiles[1] == NULL) ||
-      (params->inpFiles[2] == NULL) || (params->inpFiles[3] != NULL)) {
-    fprintf(stderr, "Expecting three input filenames\n");
-    exit(-1);
-  }
-
-  /* Read in data */
-  // load A
-  readColMajorMatrixFile(params->inpFiles[0], matArow, matAcol, matA);
-
-  // load B^T
-  readColMajorMatrixFile(params->inpFiles[2], matBcol, matBrow, matBT);
-
-  pb_InitializeTimerSet(&timers);
-  __hpvm__init();
-
-  pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
-  // copy A to device memory
-  A_sz = matArow * matAcol * sizeof(float);
-  B_sz = matBrow * matBcol * sizeof(float);
-
-  // allocate space for C
-  C_sz = matArow * matBcol * sizeof(float);
-
-  // OpenCL memory allocation
-  std::vector<float> matC(matArow * matBcol);
-
-  llvm_hpvm_track_mem(&matA.front(), A_sz);
-  llvm_hpvm_track_mem(&matBT.front(), B_sz);
-  llvm_hpvm_track_mem(&matC.front(), C_sz);
-  // Copy A and B^T into device memory
-  pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
-
-  for (size_t i = 0; i < matC.size(); i++)
-    matC[i] = 0.0f;
-
-  // Use standard sgemm interface
-  basicSgemm(&timers, 'N', 'T', matArow, matBcol, matAcol, 1.0f, &matA.front(),
-             A_sz, matArow, &matBT.front(), B_sz, matBcol, 0.0f, &matC.front(),
-             C_sz, matArow);
-
-  pb_SwitchToTimer(&timers, pb_TimerID_COPY);
-  llvm_hpvm_request_mem(&matC.front(), C_sz);
-
-  pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
-  llvm_hpvm_untrack_mem(&matA.front());
-  llvm_hpvm_untrack_mem(&matBT.front());
-  llvm_hpvm_untrack_mem(&matC.front());
-  pb_SwitchToTimer(&timers, pb_TimerID_NONE);
-
-  pb_PrintTimerSet(&timers);
-  __hpvm__cleanup();
-
-  if (params->outFile) {
-    /* Write C to file */
-    // pb_SwitchToTimer(&timers, pb_TimerID_IO);
-    writeColMajorMatrixFile(params->outFile, matArow, matBcol, matC);
-  }
-
-  double GPUtime = pb_GetElapsedTime(&(timers.timers[pb_TimerID_KERNEL]));
-  std::cout << "GFLOPs = " << 2. * matArow * matBcol * matAcol / GPUtime / 1e9
-            << std::endl;
-  pb_FreeParameters(params);
-
-  return 0;
-}
diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_tc/Makefile b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_tc/Makefile
deleted file mode 100644
index 2234bf54e1e665f95b38dd0e25c2fe1b5539ce4e..0000000000000000000000000000000000000000
--- a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_tc/Makefile
+++ /dev/null
@@ -1,8 +0,0 @@
-# (c) 2010 The Board of Trustees of the University of Illinois.
-
-LANGUAGE=hpvm
-SRCDIR_OBJS=io.ll #compute_gold.o
-HPVM_OBJS=main.hpvm.ll
-APP_CUDALDFLAGS=-lm -lstdc++
-APP_CFLAGS=-ffast-math -O3
-APP_CXXFLAGS=-ffast-math -O3
diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_tc/io.cc b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_tc/io.cc
deleted file mode 100644
index 04744f404ebaf6e669c2bbe91600519742b57dc9..0000000000000000000000000000000000000000
--- a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_tc/io.cc
+++ /dev/null
@@ -1,84 +0,0 @@
-/***************************************************************************
- *cr
- *cr            (C) Copyright 2010 The Board of Trustees of the
- *cr                        University of Illinois
- *cr                         All Rights Reserved
- *cr
- ***************************************************************************/
-
-/* I/O routines for reading and writing matrices in column-major
- * layout
- */
-
-#include <fstream>
-#include <iostream>
-#include <vector>
-
-char *readFile(const char *fileName) {
-  std::fstream f(fileName, std::fstream::in);
-  if (!f.good()) {
-    std::cerr << "Error Reading File!!" << std::endl;
-    return NULL;
-  }
-
-  f.seekg(0, std::ios::end);
-  int length = f.tellg();
-  f.seekg(0, std::ios::beg);
-
-  char *buffer;
-
-  if (length > 0) {
-    buffer = new char[length];
-    f.read(buffer, length);
-    buffer[length - 1] = 0;
-  } else {
-    buffer = new char;
-    buffer[0] = 0;
-  }
-
-  f.close();
-
-  return buffer;
-}
-
-bool readColMajorMatrixFile(const char *fn, int &nr_row, int &nr_col,
-                            std::vector<float> &v) {
-  std::cerr << "Opening file:" << fn << std::endl;
-  std::fstream f(fn, std::fstream::in);
-  if (!f.good()) {
-    return false;
-  }
-
-  // Read # of rows and cols
-  f >> nr_row;
-  f >> nr_col;
-
-  float data;
-  std::cerr << "Matrix dimension: " << nr_row << "x" << nr_col << std::endl;
-  while (f.good()) {
-    f >> data;
-    v.push_back(data);
-  }
-  v.pop_back(); // remove the duplicated last element
-  return true;
-}
-
-bool writeColMajorMatrixFile(const char *fn, int nr_row, int nr_col,
-                             std::vector<float> &v) {
-  std::cerr << "Opening file:" << fn << " for write." << std::endl;
-  std::fstream f(fn, std::fstream::out);
-  if (!f.good()) {
-    return false;
-  }
-
-  // Read # of rows and cols
-  f << nr_row << " " << nr_col << " ";
-
-  float data;
-  std::cerr << "Matrix dimension: " << nr_row << "x" << nr_col << std::endl;
-  for (int i = 0; i < v.size(); ++i) {
-    f << v[i] << ' ';
-  }
-  f << "\n";
-  return true;
-}
diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_tc/main.cc b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_tc/main.cc
deleted file mode 100644
index be39d713d55d1cb518083679fb1ea1ce717a4ca9..0000000000000000000000000000000000000000
--- a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_tc/main.cc
+++ /dev/null
@@ -1,180 +0,0 @@
-/***************************************************************************
- *cr
- *cr            (C) Copyright 2010 The Board of Trustees of the
- *cr                        University of Illinois
- *cr                         All Rights Reserved
- *cr
- ***************************************************************************/
-
-/*
- * Main entry of dense matrix-matrix multiplication kernel
- */
-
-#include <hpvm.h>
-#include <iostream>
-#include <malloc.h>
-#include <math.h>
-#include <parboil.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/time.h>
-#include <vector>
-
-// I/O routines
-extern bool readColMajorMatrixFile(const char *fn, int &nr_row, int &nr_col,
-                                   std::vector<float> &v);
-extern bool writeColMajorMatrixFile(const char *fn, int, int,
-                                    std::vector<float> &);
-extern char *readFile(const char *);
-
-// Parameters of tile sizes
-#define TILE_SZ 16
-
-#define CHECK_ERROR(errorMessage)                                              \
-  if (clStatus != CL_SUCCESS) {                                                \
-    std::cout << errorMessage << " Error!\n";                                  \
-    std::cout << "Line: " << __LINE__ << "\n";                                 \
-    exit(1);                                                                   \
-  }
-
-void mysgemmNT(float *A, int lda, float *B, int ldb, float *C, int ldc, int k,
-               float alpha, float beta) {
-  __hpvm__attributes(3, A, B, C, 1, C);
-  float c0, c1, c2, c3;
-  c0 = c1 = c2 = c3 = 0.0f;
-  int m = 4 * get_global_id(0);
-  int n = get_global_id(1);
-
-  for (int i = 0; i < k; ++i) {
-    float a0 = A[m + i * lda];
-    float a1 = A[m + 1 + i * lda];
-    float a2 = A[m + 2 + i * lda];
-    float a3 = A[m + 3 + i * lda];
-
-    float b = B[n + i * ldb];
-
-    c0 += a0 * b;
-    c1 += a1 * b;
-    c2 += a2 * b;
-    c3 += a3 * b;
-  }
-  C[m + n * ldc] = C[m + n * ldc] * beta + alpha * c0;
-  C[m + 1 + n * ldc] = C[m + 1 + n * ldc] * beta + alpha * c1;
-  C[m + 2 + n * ldc] = C[m + 2 + n * ldc] * beta + alpha * c2;
-  C[m + 3 + n * ldc] = C[m + 3 + n * ldc] * beta + alpha * c3;
-}
-
-__attribute__((noinline)) void basicSgemm(char transa, char transb, int m,
-                                          int n, int k, float alpha, float *A,
-                                          size_t bytesA, int lda, float *B,
-                                          size_t bytesB, int ldb, float beta,
-                                          float *C, size_t bytesC, int ldc) {
-  if ((transa != 'N') && (transa != 'n')) {
-    std::cerr << "unsupported value of 'transa' in regtileSgemm()" << std::endl;
-    return;
-  }
-
-  if ((transb != 'T') && (transb != 't')) {
-    std::cerr << "unsupported value of 'transb' in regtileSgemm()" << std::endl;
-    return;
-  }
-
-  // In this code we assume the matrix sizes are multiple of tile size
-  if ((m % TILE_SZ) || (n % TILE_SZ)) {
-    std::cerr << "unsupported size of matrix. m should be multiple of "
-              << TILE_SZ << "; n should be multiple of " << TILE_SZ
-              << std::endl;
-  }
-
-  unsigned db[2] = {TILE_SZ / 4, TILE_SZ};
-  unsigned dg[2] = {m / TILE_SZ * db[0], n / TILE_SZ * db[1]};
-
-  unsigned sgemmDFG = __hpvm__node(mysgemmNT, 2, 2, db[0], db[1], dg[0] / db[0],
-                                   dg[1] / db[1], 12, A, bytesA, lda, B, bytesB,
-                                   ldb, C, bytesC, ldc, k, alpha, beta, 0);
-  __hpvm__wait(sgemmDFG);
-}
-
-int main(int argc, char *argv[]) {
-
-  struct pb_Parameters *params;
-  struct pb_TimerSet timers;
-
-  size_t A_sz, B_sz, C_sz;
-  int matArow, matAcol;
-  int matBrow, matBcol;
-  std::vector<float> matA, matBT;
-
-  pb_InitializeTimerSet(&timers);
-  __hpvm__init();
-
-  /* Read command line. Expect 3 inputs: A, B and B^T
-     in column-major layout*/
-  params = pb_ReadParameters(&argc, argv);
-  if ((params->inpFiles[0] == NULL) || (params->inpFiles[1] == NULL) ||
-      (params->inpFiles[2] == NULL) || (params->inpFiles[3] != NULL)) {
-    fprintf(stderr, "Expecting three input filenames\n");
-    exit(-1);
-  }
-
-  /* Read in data */
-  pb_SwitchToTimer(&timers, pb_TimerID_IO);
-
-  // load A
-  readColMajorMatrixFile(params->inpFiles[0], matArow, matAcol, matA);
-
-  // load B^T
-  readColMajorMatrixFile(params->inpFiles[2], matBcol, matBrow, matBT);
-
-  pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
-  // copy A to device memory
-  A_sz = matArow * matAcol * sizeof(float);
-  B_sz = matBrow * matBcol * sizeof(float);
-
-  // allocate space for C
-  C_sz = matArow * matBcol * sizeof(float);
-
-  // OpenCL memory allocation
-  std::vector<float> matC(matArow * matBcol);
-
-  llvm_hpvm_track_mem(&matA.front(), A_sz);
-  llvm_hpvm_track_mem(&matBT.front(), B_sz);
-  llvm_hpvm_track_mem(&matC.front(), C_sz);
-  // Copy A and B^T into device memory
-  pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
-
-  for (size_t i = 0; i < matC.size(); i++)
-    matC[i] = 0.0f;
-
-  pb_SwitchToTimer(&timers, pb_TimerID_NONE);
-
-  // Use standard sgemm interface
-  basicSgemm('N', 'T', matArow, matBcol, matAcol, 1.0f, &matA.front(), A_sz,
-             matArow, &matBT.front(), B_sz, matBcol, 0.0f, &matC.front(), C_sz,
-             matArow);
-
-  if (params->outFile) {
-    pb_SwitchToTimer(&timers, pb_TimerID_COPY);
-
-    /* Write C to file */
-    llvm_hpvm_request_mem(&matC.front(), C_sz);
-    pb_SwitchToTimer(&timers, pb_TimerID_IO);
-    writeColMajorMatrixFile(params->outFile, matArow, matBcol, matC);
-  }
-
-  pb_SwitchToTimer(&timers, hpvm_TimerID_MEM_UNTRACK);
-  llvm_hpvm_untrack_mem(&matA.front());
-  llvm_hpvm_untrack_mem(&matBT.front());
-  llvm_hpvm_untrack_mem(&matC.front());
-  pb_SwitchToTimer(&timers, pb_TimerID_NONE);
-
-  double GPUtime = pb_GetElapsedTime(&(timers.timers[pb_TimerID_KERNEL]));
-  std::cout << "GFLOPs = " << 2. * matArow * matBcol * matAcol / GPUtime / 1e9
-            << std::endl;
-  pb_PrintTimerSet(&timers);
-  __hpvm__cleanup();
-  pb_FreeParameters(params);
-
-  return 0;
-}
diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_tc_vec/Makefile b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_tc_vec/Makefile
deleted file mode 100644
index 2234bf54e1e665f95b38dd0e25c2fe1b5539ce4e..0000000000000000000000000000000000000000
--- a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_tc_vec/Makefile
+++ /dev/null
@@ -1,8 +0,0 @@
-# (c) 2010 The Board of Trustees of the University of Illinois.
-
-LANGUAGE=hpvm
-SRCDIR_OBJS=io.ll #compute_gold.o
-HPVM_OBJS=main.hpvm.ll
-APP_CUDALDFLAGS=-lm -lstdc++
-APP_CFLAGS=-ffast-math -O3
-APP_CXXFLAGS=-ffast-math -O3
diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_tc_vec/io.cc b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_tc_vec/io.cc
deleted file mode 100644
index 04744f404ebaf6e669c2bbe91600519742b57dc9..0000000000000000000000000000000000000000
--- a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_tc_vec/io.cc
+++ /dev/null
@@ -1,84 +0,0 @@
-/***************************************************************************
- *cr
- *cr            (C) Copyright 2010 The Board of Trustees of the
- *cr                        University of Illinois
- *cr                         All Rights Reserved
- *cr
- ***************************************************************************/
-
-/* I/O routines for reading and writing matrices in column-major
- * layout
- */
-
-#include <fstream>
-#include <iostream>
-#include <vector>
-
-char *readFile(const char *fileName) {
-  std::fstream f(fileName, std::fstream::in);
-  if (!f.good()) {
-    std::cerr << "Error Reading File!!" << std::endl;
-    return NULL;
-  }
-
-  f.seekg(0, std::ios::end);
-  int length = f.tellg();
-  f.seekg(0, std::ios::beg);
-
-  char *buffer;
-
-  if (length > 0) {
-    buffer = new char[length];
-    f.read(buffer, length);
-    buffer[length - 1] = 0;
-  } else {
-    buffer = new char;
-    buffer[0] = 0;
-  }
-
-  f.close();
-
-  return buffer;
-}
-
-bool readColMajorMatrixFile(const char *fn, int &nr_row, int &nr_col,
-                            std::vector<float> &v) {
-  std::cerr << "Opening file:" << fn << std::endl;
-  std::fstream f(fn, std::fstream::in);
-  if (!f.good()) {
-    return false;
-  }
-
-  // Read # of rows and cols
-  f >> nr_row;
-  f >> nr_col;
-
-  float data;
-  std::cerr << "Matrix dimension: " << nr_row << "x" << nr_col << std::endl;
-  while (f.good()) {
-    f >> data;
-    v.push_back(data);
-  }
-  v.pop_back(); // remove the duplicated last element
-  return true;
-}
-
-bool writeColMajorMatrixFile(const char *fn, int nr_row, int nr_col,
-                             std::vector<float> &v) {
-  std::cerr << "Opening file:" << fn << " for write." << std::endl;
-  std::fstream f(fn, std::fstream::out);
-  if (!f.good()) {
-    return false;
-  }
-
-  // Read # of rows and cols
-  f << nr_row << " " << nr_col << " ";
-
-  float data;
-  std::cerr << "Matrix dimension: " << nr_row << "x" << nr_col << std::endl;
-  for (int i = 0; i < v.size(); ++i) {
-    f << v[i] << ' ';
-  }
-  f << "\n";
-  return true;
-}
diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_tc_vec/main.cc b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_tc_vec/main.cc
deleted file mode 100644
index be39d713d55d1cb518083679fb1ea1ce717a4ca9..0000000000000000000000000000000000000000
--- a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_tc_vec/main.cc
+++ /dev/null
@@ -1,180 +0,0 @@
-/***************************************************************************
- *cr
- *cr            (C) Copyright 2010 The Board of Trustees of the
- *cr                        University of Illinois
- *cr                         All Rights Reserved
- *cr
- ***************************************************************************/
-
-/*
- * Main entry of dense matrix-matrix multiplication kernel
- */
-
-#include <hpvm.h>
-#include <iostream>
-#include <malloc.h>
-#include <math.h>
-#include <parboil.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/time.h>
-#include <vector>
-
-// I/O routines
-extern bool readColMajorMatrixFile(const char *fn, int &nr_row, int &nr_col,
-                                   std::vector<float> &v);
-extern bool writeColMajorMatrixFile(const char *fn, int, int,
-                                    std::vector<float> &);
-extern char *readFile(const char *);
-
-// Parameters of tile sizes
-#define TILE_SZ 16
-
-#define CHECK_ERROR(errorMessage)                                              \
-  if (clStatus != CL_SUCCESS) {                                                \
-    std::cout << errorMessage << " Error!\n";                                  \
-    std::cout << "Line: " << __LINE__ << "\n";                                 \
-    exit(1);                                                                   \
-  }
-
-void mysgemmNT(float *A, int lda, float *B, int ldb, float *C, int ldc, int k,
-               float alpha, float beta) {
-  __hpvm__attributes(3, A, B, C, 1, C);
-  float c0, c1, c2, c3;
-  c0 = c1 = c2 = c3 = 0.0f;
-  int m = 4 * get_global_id(0);
-  int n = get_global_id(1);
-
-  for (int i = 0; i < k; ++i) {
-    float a0 = A[m + i * lda];
-    float a1 = A[m + 1 + i * lda];
-    float a2 = A[m + 2 + i * lda];
-    float a3 = A[m + 3 + i * lda];
-
-    float b = B[n + i * ldb];
-
-    c0 += a0 * b;
-    c1 += a1 * b;
-    c2 += a2 * b;
-    c3 += a3 * b;
-  }
-  C[m + n * ldc] = C[m + n * ldc] * beta + alpha * c0;
-  C[m + 1 + n * ldc] = C[m + 1 + n * ldc] * beta + alpha * c1;
-  C[m + 2 + n * ldc] = C[m + 2 + n * ldc] * beta + alpha * c2;
-  C[m + 3 + n * ldc] = C[m + 3 + n * ldc] * beta + alpha * c3;
-}
-
-__attribute__((noinline)) void basicSgemm(char transa, char transb, int m,
-                                          int n, int k, float alpha, float *A,
-                                          size_t bytesA, int lda, float *B,
-                                          size_t bytesB, int ldb, float beta,
-                                          float *C, size_t bytesC, int ldc) {
-  if ((transa != 'N') && (transa != 'n')) {
-    std::cerr << "unsupported value of 'transa' in regtileSgemm()" << std::endl;
-    return;
-  }
-
-  if ((transb != 'T') && (transb != 't')) {
-    std::cerr << "unsupported value of 'transb' in regtileSgemm()" << std::endl;
-    return;
-  }
-
-  // In this code we assume the matrix sizes are multiple of tile size
-  if ((m % TILE_SZ) || (n % TILE_SZ)) {
-    std::cerr << "unsupported size of matrix. m should be multiple of "
-              << TILE_SZ << "; n should be multiple of " << TILE_SZ
-              << std::endl;
-  }
-
-  unsigned db[2] = {TILE_SZ / 4, TILE_SZ};
-  unsigned dg[2] = {m / TILE_SZ * db[0], n / TILE_SZ * db[1]};
-
-  unsigned sgemmDFG = __hpvm__node(mysgemmNT, 2, 2, db[0], db[1], dg[0] / db[0],
-                                   dg[1] / db[1], 12, A, bytesA, lda, B, bytesB,
-                                   ldb, C, bytesC, ldc, k, alpha, beta, 0);
-  __hpvm__wait(sgemmDFG);
-}
-
-int main(int argc, char *argv[]) {
-
-  struct pb_Parameters *params;
-  struct pb_TimerSet timers;
-
-  size_t A_sz, B_sz, C_sz;
-  int matArow, matAcol;
-  int matBrow, matBcol;
-  std::vector<float> matA, matBT;
-
-  pb_InitializeTimerSet(&timers);
-  __hpvm__init();
-
-  /* Read command line. Expect 3 inputs: A, B and B^T
-     in column-major layout*/
-  params = pb_ReadParameters(&argc, argv);
-  if ((params->inpFiles[0] == NULL) || (params->inpFiles[1] == NULL) ||
-      (params->inpFiles[2] == NULL) || (params->inpFiles[3] != NULL)) {
-    fprintf(stderr, "Expecting three input filenames\n");
-    exit(-1);
-  }
-
-  /* Read in data */
-  pb_SwitchToTimer(&timers, pb_TimerID_IO);
-
-  // load A
-  readColMajorMatrixFile(params->inpFiles[0], matArow, matAcol, matA);
-
-  // load B^T
-  readColMajorMatrixFile(params->inpFiles[2], matBcol, matBrow, matBT);
-
-  pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
-  // copy A to device memory
-  A_sz = matArow * matAcol * sizeof(float);
-  B_sz = matBrow * matBcol * sizeof(float);
-
-  // allocate space for C
-  C_sz = matArow * matBcol * sizeof(float);
-
-  // OpenCL memory allocation
-  std::vector<float> matC(matArow * matBcol);
-
-  llvm_hpvm_track_mem(&matA.front(), A_sz);
-  llvm_hpvm_track_mem(&matBT.front(), B_sz);
-  llvm_hpvm_track_mem(&matC.front(), C_sz);
-  // Copy A and B^T into device memory
-  pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
-
-  for (size_t i = 0; i < matC.size(); i++)
-    matC[i] = 0.0f;
-
-  pb_SwitchToTimer(&timers, pb_TimerID_NONE);
-
-  // Use standard sgemm interface
-  basicSgemm('N', 'T', matArow, matBcol, matAcol, 1.0f, &matA.front(), A_sz,
-             matArow, &matBT.front(), B_sz, matBcol, 0.0f, &matC.front(), C_sz,
-             matArow);
-
-  if (params->outFile) {
-    pb_SwitchToTimer(&timers, pb_TimerID_COPY);
-
-    /* Write C to file */
-    llvm_hpvm_request_mem(&matC.front(), C_sz);
-    pb_SwitchToTimer(&timers, pb_TimerID_IO);
-    writeColMajorMatrixFile(params->outFile, matArow, matBcol, matC);
-  }
-
-  pb_SwitchToTimer(&timers, hpvm_TimerID_MEM_UNTRACK);
-  llvm_hpvm_untrack_mem(&matA.front());
-  llvm_hpvm_untrack_mem(&matBT.front());
-  llvm_hpvm_untrack_mem(&matC.front());
-  pb_SwitchToTimer(&timers, pb_TimerID_NONE);
-
-  double GPUtime = pb_GetElapsedTime(&(timers.timers[pb_TimerID_KERNEL]));
-  std::cout << "GFLOPs = " << 2. * matArow * matBcol * matAcol / GPUtime / 1e9
-            << std::endl;
-  pb_PrintTimerSet(&timers);
-  __hpvm__cleanup();
-  pb_FreeParameters(params);
-
-  return 0;
-}
diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec/Makefile b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec/Makefile
deleted file mode 100644
index 2234bf54e1e665f95b38dd0e25c2fe1b5539ce4e..0000000000000000000000000000000000000000
--- a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec/Makefile
+++ /dev/null
@@ -1,8 +0,0 @@
-# (c) 2010 The Board of Trustees of the University of Illinois.
-
-LANGUAGE=hpvm
-SRCDIR_OBJS=io.ll #compute_gold.o
-HPVM_OBJS=main.hpvm.ll
-APP_CUDALDFLAGS=-lm -lstdc++
-APP_CFLAGS=-ffast-math -O3
-APP_CXXFLAGS=-ffast-math -O3
diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec/io.cc b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec/io.cc
deleted file mode 100644
index 04744f404ebaf6e669c2bbe91600519742b57dc9..0000000000000000000000000000000000000000
--- a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec/io.cc
+++ /dev/null
@@ -1,84 +0,0 @@
-/***************************************************************************
- *cr
- *cr            (C) Copyright 2010 The Board of Trustees of the
- *cr                        University of Illinois
- *cr                         All Rights Reserved
- *cr
- ***************************************************************************/
-
-/* I/O routines for reading and writing matrices in column-major
- * layout
- */
-
-#include <fstream>
-#include <iostream>
-#include <vector>
-
-char *readFile(const char *fileName) {
-  std::fstream f(fileName, std::fstream::in);
-  if (!f.good()) {
-    std::cerr << "Error Reading File!!" << std::endl;
-    return NULL;
-  }
-
-  f.seekg(0, std::ios::end);
-  int length = f.tellg();
-  f.seekg(0, std::ios::beg);
-
-  char *buffer;
-
-  if (length > 0) {
-    buffer = new char[length];
-    f.read(buffer, length);
-    buffer[length - 1] = 0;
-  } else {
-    buffer = new char;
-    buffer[0] = 0;
-  }
-
-  f.close();
-
-  return buffer;
-}
-
-bool readColMajorMatrixFile(const char *fn, int &nr_row, int &nr_col,
-                            std::vector<float> &v) {
-  std::cerr << "Opening file:" << fn << std::endl;
-  std::fstream f(fn, std::fstream::in);
-  if (!f.good()) {
-    return false;
-  }
-
-  // Read # of rows and cols
-  f >> nr_row;
-  f >> nr_col;
-
-  float data;
-  std::cerr << "Matrix dimension: " << nr_row << "x" << nr_col << std::endl;
-  while (f.good()) {
-    f >> data;
-    v.push_back(data);
-  }
-  v.pop_back(); // remove the duplicated last element
-  return true;
-}
-
-bool writeColMajorMatrixFile(const char *fn, int nr_row, int nr_col,
-                             std::vector<float> &v) {
-  std::cerr << "Opening file:" << fn << " for write." << std::endl;
-  std::fstream f(fn, std::fstream::out);
-  if (!f.good()) {
-    return false;
-  }
-
-  // Read # of rows and cols
-  f << nr_row << " " << nr_col << " ";
-
-  float data;
-  std::cerr << "Matrix dimension: " << nr_row << "x" << nr_col << std::endl;
-  for (int i = 0; i < v.size(); ++i) {
-    f << v[i] << ' ';
-  }
-  f << "\n";
-  return true;
-}
diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec/kernel.cl b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec/kernel.cl
deleted file mode 100644
index 7530a400759e2d6db6ffd466c3f6aaf9dfab2117..0000000000000000000000000000000000000000
--- a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec/kernel.cl
+++ /dev/null
@@ -1,53 +0,0 @@
-/***************************************************************************
- *cr
- *cr            (C) Copyright 2010 The Board of Trustees of the
- *cr                        University of Illinois
- *cr                         All Rights Reserved
- *cr
- ***************************************************************************/
-
-/* 
- * Kernel of dense matrix-matrix multiplication kernel.
- */
-
-__kernel void mysgemmNT( __global float *A, size_t bytesA, int lda, __global float *B, size_t bytesB, int ldb, __global float* C, size_t bytesC, int ldc, int k, float alpha, float beta )
-{
-/*
-    // Partial results 
-    float c[8];
-    for (int i=0; i < 8; i++)
-	c[i] = 0.0f;
-    float a[8];
-    float b[8];
-
-    int m = get_global_id(0) * 8;
-    int n = get_global_id(1);
-
-    for (int i = 0; i < k; ++i) {
-        for (int id = 0; id < 8; id++) {
-	    a[id] = A[m + id + i * lda];
-	    b[id] = B[n + i * ldb];
-	    c[id] += a[id] * b[id];
-        }
-    }
-
-    for (int id = 0; id < 8; id++)
-        C[m+id+n*ldc] = C[m+id+n*ldc] * beta + alpha * c[id];
-*/
-
-    // Partial results 
-    float8 cp = (float8)(0.0f);
-
-    int m = get_global_id(0) * 8;
-    int n = get_global_id(1);
-
-    for (int i = 0; i < k; ++i) {
-        float8 a = vload8(0, A + (m + i * lda));
-        float8 b = (float8)(B[n + i * ldb]);
-        cp += a * b;
-    }
-
-    float8 c = vload8(0, C + (m+n*ldc));
-    c = c * beta + alpha * cp;
-    vstore8(c, 0, C + (m+n*ldc));
-}
diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec/main.cc b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec/main.cc
deleted file mode 100644
index 286297d6fefe0b6f72bdc9e8a9079a131a7b16bf..0000000000000000000000000000000000000000
--- a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec/main.cc
+++ /dev/null
@@ -1,189 +0,0 @@
-/***************************************************************************
- *cr
- *cr            (C) Copyright 2010 The Board of Trustees of the
- *cr                        University of Illinois
- *cr                         All Rights Reserved
- *cr
- ***************************************************************************/
-
-/*
- * Main entry of dense matrix-matrix multiplication kernel
- */
-
-#include <hpvm.h>
-#include <iostream>
-#include <malloc.h>
-#include <math.h>
-#include <parboil.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/time.h>
-#include <vector>
-
-// I/O routines
-extern bool readColMajorMatrixFile(const char *fn, int &nr_row, int &nr_col,
-                                   std::vector<float> &v);
-extern bool writeColMajorMatrixFile(const char *fn, int, int,
-                                    std::vector<float> &);
-extern char *readFile(const char *);
-
-// Parameters of tile sizes
-#define TILE_SZ 16
-#define VEC_SZ 8
-
-#define CHECK_ERROR(errorMessage)                                              \
-  if (clStatus != CL_SUCCESS) {                                                \
-    std::cout << errorMessage << " Error!\n";                                  \
-    std::cout << "Line: " << __LINE__ << "\n";                                 \
-    exit(1);                                                                   \
-  }
-
-void mysgemmNT(float *A, int lda, float *B, int ldb, float *C, int ldc, int k,
-               float alpha, float beta) {
-  __hpvm__hint(hpvm::GPU_TARGET);
-  __hpvm__attributes(3, A, B, C, 1, C);
-
-  float c = 0.0f;
-  int m = get_global_id(0);
-  int n = get_global_id(1);
-
-  for (int i = 0; i < k; ++i) {
-    float a = A[m + i * lda];
-    float b = B[n + i * ldb];
-    c += a * b;
-  }
-  C[m + n * ldc] = C[m + n * ldc] * beta + alpha * c;
-  /*
-      Will be substituted by this kernel at the llvm level
-      // Partial results
-      float8 cp = (float8)(0.0f);
-
-      int m = get_global_id(0) * 8;
-      int n = get_global_id(1);
-
-      for (int i = 0; i < k; ++i) {
-          float8 a = vload8(0, A + (m + i * lda));
-          float8 b = (float8)(B[n + i * ldb]);
-          cp += a * b;
-      }
-
-      float8 c = vload8(0, C + (m+n*ldc));
-      c = c * beta + alpha * cp;
-      vstore8(c, 0, C + (m+n*ldc));
-  */
-}
-
-__attribute__((noinline)) void basicSgemm(char transa, char transb, int m,
-                                          int n, int k, float alpha, float *A,
-                                          size_t bytesA, int lda, float *B,
-                                          size_t bytesB, int ldb, float beta,
-                                          float *C, size_t bytesC, int ldc) {
-  if ((transa != 'N') && (transa != 'n')) {
-    std::cerr << "unsupported value of 'transa' in regtileSgemm()" << std::endl;
-    return;
-  }
-
-  if ((transb != 'T') && (transb != 't')) {
-    std::cerr << "unsupported value of 'transb' in regtileSgemm()" << std::endl;
-    return;
-  }
-
-  // In this code we assume the matrix sizes are multiple of tile size
-  if ((m % TILE_SZ) || (n % TILE_SZ)) {
-    std::cerr << "unsupported size of matrix. m should be multiple of "
-              << TILE_SZ << "; n should be multiple of " << TILE_SZ
-              << std::endl;
-  }
-
-  unsigned db[2] = {TILE_SZ / VEC_SZ, TILE_SZ};
-  unsigned dg[2] = {m / TILE_SZ * db[0], n / TILE_SZ * db[1]};
-
-  unsigned sgemmDFG = __hpvm__node(mysgemmNT, 2, 2, db[0], db[1], dg[0] / db[0],
-                                   dg[1] / db[1], 12, A, bytesA, lda, B, bytesB,
-                                   ldb, C, bytesC, ldc, k, alpha, beta, 0);
-  __hpvm__wait(sgemmDFG);
-}
-
-int main(int argc, char *argv[]) {
-
-  struct pb_Parameters *params;
-  struct pb_TimerSet timers;
-
-  size_t A_sz, B_sz, C_sz;
-  int matArow, matAcol;
-  int matBrow, matBcol;
-  std::vector<float> matA, matBT;
-
-  /* Read command line. Expect 3 inputs: A, B and B^T
-     in column-major layout*/
-  params = pb_ReadParameters(&argc, argv);
-  if ((params->inpFiles[0] == NULL) || (params->inpFiles[1] == NULL) ||
-      (params->inpFiles[2] == NULL) || (params->inpFiles[3] != NULL)) {
-    fprintf(stderr, "Expecting three input filenames\n");
-    exit(-1);
-  }
-
-  /* Read in data */
-  // load A
-  readColMajorMatrixFile(params->inpFiles[0], matArow, matAcol, matA);
-
-  // load B^T
-  readColMajorMatrixFile(params->inpFiles[2], matBcol, matBrow, matBT);
-
-  pb_InitializeTimerSet(&timers);
-  __hpvm__init();
-
-  pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
-  // copy A to device memory
-  A_sz = matArow * matAcol * sizeof(float);
-  B_sz = matBrow * matBcol * sizeof(float);
-
-  // allocate space for C
-  C_sz = matArow * matBcol * sizeof(float);
-
-  // OpenCL memory allocation
-  std::vector<float> matC(matArow * matBcol);
-
-  llvm_hpvm_track_mem(&matA.front(), A_sz);
-  llvm_hpvm_track_mem(&matBT.front(), B_sz);
-  llvm_hpvm_track_mem(&matC.front(), C_sz);
-  // Copy A and B^T into device memory
-  pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
-
-  for (size_t i = 0; i < matC.size(); i++)
-    matC[i] = 0.0f;
-
-  pb_SwitchToTimer(&timers, pb_TimerID_NONE);
-
-  // Use standard sgemm interface
-  basicSgemm('N', 'T', matArow, matBcol, matAcol, 1.0f, &matA.front(), A_sz,
-             matArow, &matBT.front(), B_sz, matBcol, 0.0f, &matC.front(), C_sz,
-             matArow);
-
-  pb_SwitchToTimer(&timers, pb_TimerID_COPY);
-  llvm_hpvm_request_mem(&matC.front(), C_sz);
-
-  pb_SwitchToTimer(&timers, hpvm_TimerID_MEM_UNTRACK);
-  llvm_hpvm_untrack_mem(&matA.front());
-  llvm_hpvm_untrack_mem(&matBT.front());
-  llvm_hpvm_untrack_mem(&matC.front());
-  pb_SwitchToTimer(&timers, pb_TimerID_NONE);
-
-  pb_PrintTimerSet(&timers);
-  __hpvm__cleanup();
-
-  if (params->outFile) {
-
-    /* Write C to file */
-    // pb_SwitchToTimer(&timers, pb_TimerID_IO);
-    writeColMajorMatrixFile(params->outFile, matArow, matBcol, matC);
-  }
-
-  double GPUtime = pb_GetElapsedTime(&(timers.timers[pb_TimerID_KERNEL]));
-  std::cout << "GFLOPs = " << 2. * matArow * matBcol * matAcol / GPUtime / 1e9
-            << std::endl;
-  pb_FreeParameters(params);
-
-  return 0;
-}
diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec_opt/Makefile b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec_opt/Makefile
deleted file mode 100644
index 2234bf54e1e665f95b38dd0e25c2fe1b5539ce4e..0000000000000000000000000000000000000000
--- a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec_opt/Makefile
+++ /dev/null
@@ -1,8 +0,0 @@
-# (c) 2010 The Board of Trustees of the University of Illinois.
-
-LANGUAGE=hpvm
-SRCDIR_OBJS=io.ll #compute_gold.o
-HPVM_OBJS=main.hpvm.ll
-APP_CUDALDFLAGS=-lm -lstdc++
-APP_CFLAGS=-ffast-math -O3
-APP_CXXFLAGS=-ffast-math -O3
diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec_opt/io.cc b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec_opt/io.cc
deleted file mode 100644
index 04744f404ebaf6e669c2bbe91600519742b57dc9..0000000000000000000000000000000000000000
--- a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec_opt/io.cc
+++ /dev/null
@@ -1,84 +0,0 @@
-/***************************************************************************
- *cr
- *cr            (C) Copyright 2010 The Board of Trustees of the
- *cr                        University of Illinois
- *cr                         All Rights Reserved
- *cr
- ***************************************************************************/
-
-/* I/O routines for reading and writing matrices in column-major
- * layout
- */
-
-#include <fstream>
-#include <iostream>
-#include <vector>
-
-char *readFile(const char *fileName) {
-  std::fstream f(fileName, std::fstream::in);
-  if (!f.good()) {
-    std::cerr << "Error Reading File!!" << std::endl;
-    return NULL;
-  }
-
-  f.seekg(0, std::ios::end);
-  int length = f.tellg();
-  f.seekg(0, std::ios::beg);
-
-  char *buffer;
-
-  if (length > 0) {
-    buffer = new char[length];
-    f.read(buffer, length);
-    buffer[length - 1] = 0;
-  } else {
-    buffer = new char;
-    buffer[0] = 0;
-  }
-
-  f.close();
-
-  return buffer;
-}
-
-bool readColMajorMatrixFile(const char *fn, int &nr_row, int &nr_col,
-                            std::vector<float> &v) {
-  std::cerr << "Opening file:" << fn << std::endl;
-  std::fstream f(fn, std::fstream::in);
-  if (!f.good()) {
-    return false;
-  }
-
-  // Read # of rows and cols
-  f >> nr_row;
-  f >> nr_col;
-
-  float data;
-  std::cerr << "Matrix dimension: " << nr_row << "x" << nr_col << std::endl;
-  while (f.good()) {
-    f >> data;
-    v.push_back(data);
-  }
-  v.pop_back(); // remove the duplicated last element
-  return true;
-}
-
-bool writeColMajorMatrixFile(const char *fn, int nr_row, int nr_col,
-                             std::vector<float> &v) {
-  std::cerr << "Opening file:" << fn << " for write." << std::endl;
-  std::fstream f(fn, std::fstream::out);
-  if (!f.good()) {
-    return false;
-  }
-
-  // Read # of rows and cols
-  f << nr_row << " " << nr_col << " ";
-
-  float data;
-  std::cerr << "Matrix dimension: " << nr_row << "x" << nr_col << std::endl;
-  for (int i = 0; i < v.size(); ++i) {
-    f << v[i] << ' ';
-  }
-  f << "\n";
-  return true;
-}
diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec_opt/kernel.cl b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec_opt/kernel.cl
deleted file mode 100644
index cc6e708148f40c80186004d3febd66988c67ae37..0000000000000000000000000000000000000000
--- a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec_opt/kernel.cl
+++ /dev/null
@@ -1,86 +0,0 @@
-/***************************************************************************
- *cr
- *cr            (C) Copyright 2010 The Board of Trustees of the
- *cr                        University of Illinois
- *cr                         All Rights Reserved
- *cr
- ***************************************************************************/
-
-/* 
- * Kernel of dense matrix-matrix multiplication kernel.
- */
-
-// Parameters of tile sizes
-#define TILE_N 8
-#define TILE_TB_HEIGHT 8
-#define TILE_M (TILE_N*TILE_TB_HEIGHT)
-
-__kernel void mysgemmNT( __global const float *A, int lda, __global const float *B, int ldb, __global float* C, int ldc, int k, float alpha, float beta )
-{
-
-    float c[TILE_N];
-    for (int i=0; i < TILE_N; i++)
-	c[i] = 0.0f;
-   
-    int mid = get_local_id(1)*get_local_size(0)+get_local_id(0);
-    int m = get_group_id(0) * TILE_M + mid;
-
-    int b_base = 0;
-
-    for (int i = 0; i < k; i+=TILE_TB_HEIGHT) {
-	float a; 
-        b_base = get_group_id(1) * TILE_N + i * ldb;
-
-	for (int j = 0; j < TILE_TB_HEIGHT; j++) {
-	    a = A[m + (i+j)*lda];
-	    for (int kk = 0; kk < TILE_N; kk++)
-		c[kk] += a * B[b_base + j * ldb + kk];
-
-	}
-    }
-    int t = ldc * get_group_id(1) * TILE_N + m;
-    for (int i = 0; i < TILE_N; i++) {
-	C[t+i*ldc] = C[t+i*ldc] * beta + alpha * c[i];
-    }
-/*
-    Will be substituted by this kernel at the llvm level
-
-    // Partial results 
-    floatn cp = (floatn)(0.0f);
-
-    int mid = get_local_id(1)*get_local_size(0)+get_local_id(0);
-    int m = get_group_id(0) * TILE_M + mid;
-
-    int b_base = 0;
-
-    for (int i = 0; i < k; i+=TILE_TB_HEIGHT) {
-	float a;
-        b_base = get_group_id(1) * TILE_N + i * ldb;
-
-	for (int j = 0; j < TILE_TB_HEIGHT; j++) {
-	    a = A[m + (i+j)*lda];
-	    cp += a * vloadn(0, B + b_base + j * ldb);
-	}
-    }
-
-    cp = alpha * cp;
-    float c[TILE_N];
-    c[0] = cp.s0;
-    c[1] = cp.s1;
-    c[2] = cp.s2;
-    c[3] = cp.s3;
-    c[4] = cp.s4;
-    c[5] = cp.s5;
-    c[6] = cp.s6;
-    c[7] = cp.s7;
-
-    int t = ldc * get_group_id(1) * TILE_N + m;
-    for (int i = 0; i < TILE_N; i++) {
-	C[t+i*ldc] = C[t+i*ldc] * beta + c[i];
-    }
-
-*/
-
-*/
-
-}
diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec_opt/main.cc b/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec_opt/main.cc
deleted file mode 100644
index 8fbc45e08a9e2fd1e3af6cc03360086b354665d7..0000000000000000000000000000000000000000
--- a/hpvm/test/parboil/benchmarks/sgemm/src/hpvm_vec_opt/main.cc
+++ /dev/null
@@ -1,227 +0,0 @@
-/***************************************************************************
- *cr
- *cr            (C) Copyright 2010 The Board of Trustees of the
- *cr                        University of Illinois
- *cr                         All Rights Reserved
- *cr
- ***************************************************************************/
-
-/*
- * Main entry of dense matrix-matrix multiplication kernel
- */
-
-#include <hpvm.h>
-#include <iostream>
-#include <malloc.h>
-#include <math.h>
-#include <parboil.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/time.h>
-#include <vector>
-
-// I/O routines
-extern bool readColMajorMatrixFile(const char *fn, int &nr_row, int &nr_col,
-                                   std::vector<float> &v);
-extern bool writeColMajorMatrixFile(const char *fn, int, int,
-                                    std::vector<float> &);
-extern char *readFile(const char *);
-
-// Parameters of tile sizes
-#define TILE_N 8
-#define TILE_TB_HEIGHT 8
-#define TILE_M (TILE_N * TILE_TB_HEIGHT)
-
-#define CHECK_ERROR(errorMessage)                                              \
-  if (clStatus != CL_SUCCESS) {                                                \
-    std::cout << errorMessage << " Error!\n";                                  \
-    std::cout << "Line: " << __LINE__ << "\n";                                 \
-    exit(1);                                                                   \
-  }
-
-void mysgemmNT(float *A, int lda, float *B, int ldb, float *C, int ldc, int k,
-               float alpha, float beta) {
-  __hpvm__hint(hpvm::SPIR_TARGET);
-  __hpvm__attributes(3, A, B, C, 1, C);
-
-  float c[TILE_N];
-  for (int i = 0; i < TILE_N; i++)
-    c[i] = 0.0f;
-
-  int mid = get_local_id(1) * get_local_size(0) + get_local_id(0);
-  int m = get_group_id(0) * TILE_M + mid;
-
-  int b_base = 0;
-
-  for (int i = 0; i < k; i += TILE_TB_HEIGHT) {
-    float a;
-    b_base = get_group_id(1) * TILE_N + i * ldb;
-
-    for (int j = 0; j < TILE_TB_HEIGHT; j++) {
-      a = A[m + (i + j) * lda];
-      for (int kk = 0; kk < TILE_N; kk++)
-        c[kk] += a * B[b_base + j * ldb + kk];
-    }
-  }
-  int t = ldc * get_group_id(1) * TILE_N + m;
-  for (int i = 0; i < TILE_N; i++) {
-    C[t + i * ldc] = C[t + i * ldc] * beta + alpha * c[i];
-  }
-  /*
-      Will be substituted by this kernel at the llvm level
-
-      // Partial results
-      floatn cp = (floatn)(0.0f);
-
-      int mid = get_local_id(1)*get_local_size(0)+get_local_id(0);
-      int m = get_group_id(0) * TILE_M + mid;
-
-      int b_base = 0;
-
-      for (int i = 0; i < k; i+=TILE_TB_HEIGHT) {
-          float a;
-          b_base = get_group_id(1) * TILE_N + i * ldb;
-
-          for (int j = 0; j < TILE_TB_HEIGHT; j++) {
-              a = A[m + (i+j)*lda];
-              cp += a * vloadn(0, B + b_base + j * ldb);
-          }
-      }
-
-      cp = alpha * cp;
-      float c[TILE_N];
-      c[0] = cp.s0;
-      c[1] = cp.s1;
-      c[2] = cp.s2;
-      c[3] = cp.s3;
-      c[4] = cp.s4;
-      c[5] = cp.s5;
-      c[6] = cp.s6;
-      c[7] = cp.s7;
-
-      int t = ldc * get_group_id(1) * TILE_N + m;
-      for (int i = 0; i < TILE_N; i++) {
-          C[t+i*ldc] = C[t+i*ldc] * beta + c[i];
-      }
-
-  */
-}
-
-__attribute__((noinline)) void basicSgemm(char transa, char transb, int m,
-                                          int n, int k, float alpha, float *A,
-                                          size_t bytesA, int lda, float *B,
-                                          size_t bytesB, int ldb, float beta,
-                                          float *C, size_t bytesC, int ldc) {
-  if ((transa != 'N') && (transa != 'n')) {
-    std::cerr << "unsupported value of 'transa' in regtileSgemm()" << std::endl;
-    return;
-  }
-
-  if ((transb != 'T') && (transb != 't')) {
-    std::cerr << "unsupported value of 'transb' in regtileSgemm()" << std::endl;
-    return;
-  }
-
-  // In this code we assume the matrix sizes are multiple of tile size
-  if ((m % TILE_M) || (n % TILE_N)) {
-    std::cerr << "unsupported size of matrix. m should be multiple of "
-              << TILE_M << "; n should be multiple of " << TILE_N << std::endl;
-    return;
-  }
-
-  //    unsigned db[2] = {TILE_SZ/VEC_SZ,TILE_SZ};
-  //    unsigned dg[2] = {m/TILE_SZ*db[0],n/TILE_SZ*db[1]};
-  unsigned db[2] = {TILE_N, TILE_TB_HEIGHT};
-  unsigned dg[2] = {m * TILE_N / TILE_M, n * TILE_TB_HEIGHT / TILE_N};
-
-  void *sgemmDFG = __hpvm__node(mysgemmNT, 2, 2, db[0], db[1], dg[0] / db[0],
-                                dg[1] / db[1], 12, A, bytesA, lda, B, bytesB,
-                                ldb, C, bytesC, ldc, k, alpha, beta, 0);
-  __hpvm__wait(sgemmDFG);
-}
-
-int main(int argc, char *argv[]) {
-
-  struct pb_Parameters *params;
-  struct pb_TimerSet timers;
-
-  size_t A_sz, B_sz, C_sz;
-  int matArow, matAcol;
-  int matBrow, matBcol;
-  std::vector<float> matA, matBT;
-
-  /* Read command line. Expect 3 inputs: A, B and B^T
-     in column-major layout*/
-  params = pb_ReadParameters(&argc, argv);
-  if ((params->inpFiles[0] == NULL) || (params->inpFiles[1] == NULL) ||
-      (params->inpFiles[2] == NULL) || (params->inpFiles[3] != NULL)) {
-    fprintf(stderr, "Expecting three input filenames\n");
-    exit(-1);
-  }
-
-  /* Read in data */
-  // load A
-  readColMajorMatrixFile(params->inpFiles[0], matArow, matAcol, matA);
-
-  // load B^T
-  readColMajorMatrixFile(params->inpFiles[2], matBcol, matBrow, matBT);
-
-  pb_InitializeTimerSet(&timers);
-  __hpvm__init();
-
-  pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
-  // copy A to device memory
-  A_sz = matArow * matAcol * sizeof(float);
-  B_sz = matBrow * matBcol * sizeof(float);
-
-  // allocate space for C
-  C_sz = matArow * matBcol * sizeof(float);
-
-  // OpenCL memory allocation
-  std::vector<float> matC(matArow * matBcol);
-
-  pb_SwitchToTimer(&timers, hpvm_TimerID_MEM_TRACK);
-  llvm_hpvm_track_mem(&matA.front(), A_sz);
-  llvm_hpvm_track_mem(&matBT.front(), B_sz);
-  llvm_hpvm_track_mem(&matC.front(), C_sz);
-
-  // Copy A and B^T into device memory
-  pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
-
-  for (size_t i = 0; i < matC.size(); i++)
-    matC[i] = 0.0f;
-
-  pb_SwitchToTimer(&timers, pb_TimerID_NONE);
-
-  // Use standard sgemm interface
-  basicSgemm('N', 'T', matArow, matBcol, matAcol, 1.0f, &matA.front(), A_sz,
-             matArow, &matBT.front(), B_sz, matBcol, 0.0f, &matC.front(), C_sz,
-             matArow);
-
-  pb_SwitchToTimer(&timers, pb_TimerID_COPY);
-  llvm_hpvm_request_mem(&matC.front(), C_sz);
-
-  pb_SwitchToTimer(&timers, hpvm_TimerID_MEM_UNTRACK);
-  llvm_hpvm_untrack_mem(&matA.front());
-  llvm_hpvm_untrack_mem(&matBT.front());
-  llvm_hpvm_untrack_mem(&matC.front());
-  pb_SwitchToTimer(&timers, pb_TimerID_NONE);
-
-  pb_PrintTimerSet(&timers);
-  __hpvm__cleanup();
-
-  if (params->outFile) {
-
-    /* Write C to file */
-    // pb_SwitchToTimer(&timers, pb_TimerID_IO);
-    writeColMajorMatrixFile(params->outFile, matArow, matBcol, matC);
-  }
-
-  double GPUtime = pb_GetElapsedTime(&(timers.timers[pb_TimerID_KERNEL]));
-  std::cout << "GFLOPs = " << 2. * matArow * matBcol * matAcol / GPUtime / 1e9
-            << std::endl;
-  pb_FreeParameters(params);
-
-  return 0;
-}
diff --git a/hpvm/test/parboil/benchmarks/stencil/src/hpvm_vec/common.h b/hpvm/test/parboil/benchmarks/stencil/src/hpvm_vec/common.h
deleted file mode 100644
index 12a6d131c29067073fa79f09c4e6f91b8662969c..0000000000000000000000000000000000000000
--- a/hpvm/test/parboil/benchmarks/stencil/src/hpvm_vec/common.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/***************************************************************************
- *cr
- *cr            (C) Copyright 2010 The Board of Trustees of the
- *cr                        University of Illinois
- *cr                         All Rights Reserved
- *cr
- ***************************************************************************/
-
-#ifndef _COMMON_H_
-#define _COMMON_H_
-//#define Index3D(_nx,_ny,_i,_j,_k) ((_i)+_nx*((_j)+_ny*(_k)))
-// +3 for padding
-#define Index3D(_nx, _ny, _i, _j, _k) ((_i) + _nx * ((_j) + _ny * (_k)) + 3)
-#define TCF 4
-#endif
diff --git a/hpvm/test/parboil/benchmarks/stencil/src/hpvm_vec/stencil.c b/hpvm/test/parboil/benchmarks/stencil/src/hpvm_vec/stencil.c
deleted file mode 100644
index 35c5ed960c2031b0b84124bbdd1aeb95042625ee..0000000000000000000000000000000000000000
--- a/hpvm/test/parboil/benchmarks/stencil/src/hpvm_vec/stencil.c
+++ /dev/null
@@ -1,176 +0,0 @@
-
-/***************************************************************************
- *cr
- *cr            (C) Copyright 2010 The Board of Trustees of the
- *cr                        University of Illinois
- *cr                         All Rights Reserved
- *cr
- ***************************************************************************/
-
-#include "common.h"
-#include "file.h"
-#include <hpvm.h>
-#include <parboil.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-static int read_data(float *A0, int nx, int ny, int nz, FILE *fp) {
-  int s = 0;
-  int i, j, k;
-  for (i = 0; i < nz; i++) {
-    for (j = 0; j < ny; j++) {
-      for (k = 0; k < nx; k++) {
-        fread(A0 + s, sizeof(float), 1, fp);
-        s++;
-      }
-    }
-  }
-  return 0;
-}
-
-void naive_kernel(float c0, float c1, float *A0, float *Anext, int nx, int ny,
-                  int nz) {
-  __hpvm__attributes(2, A0, Anext, 1, Anext);
-  int i = get_global_id(0) + 1;
-  int j = get_global_id(1) + 1;
-  int k = get_global_id(2) + 1;
-
-  if (i < nx - 1) {
-    Anext[Index3D(nx, ny, i, j, k)] = c1 * (A0[Index3D(nx, ny, i, j, k + 1)] +
-                                            A0[Index3D(nx, ny, i, j, k - 1)] +
-                                            A0[Index3D(nx, ny, i, j + 1, k)] +
-                                            A0[Index3D(nx, ny, i, j - 1, k)] +
-                                            A0[Index3D(nx, ny, i + 1, j, k)] +
-                                            A0[Index3D(nx, ny, i - 1, j, k)]) -
-                                      A0[Index3D(nx, ny, i, j, k)] * c0;
-  }
-}
-
-int main(int argc, char **argv) {
-  struct pb_TimerSet timers;
-  struct pb_Parameters *parameters;
-
-  printf("OpenCL accelerated 7 points stencil codes****\n");
-  printf("Author: Li-Wen Chang <lchang20@illinois.edu>\n");
-  parameters = pb_ReadParameters(&argc, argv);
-
-  /*pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);*/
-
-  // declaration
-  int nx, ny, nz;
-  size_t size;
-  int iteration;
-  float c0 = 1.0 / 6.0;
-  float c1 = 1.0 / 6.0 / 6.0;
-
-  if (argc < 5) {
-    printf("Usage: probe nx ny nz t\n"
-           "nx: the grid size x\n"
-           "ny: the grid size y\n"
-           "nz: the grid size z\n"
-           "t: the iteration time\n");
-    return -1;
-  }
-
-  nx = atoi(argv[1]);
-  if (nx < 1)
-    return -1;
-  ny = atoi(argv[2]);
-  if (ny < 1)
-    return -1;
-  nz = atoi(argv[3]);
-  if (nz < 1)
-    return -1;
-  iteration = atoi(argv[4]);
-  if (iteration < 1)
-    return -1;
-
-  // host data
-  float *h_A0;
-  float *h_Anext;
-
-  // load data from files
-
-  size = nx * ny * nz;
-
-  // Padding in the beginning to get aligned loads and stores
-  size = size + 3;
-
-  h_A0 = (float *)malloc(sizeof(float) * size);
-  h_Anext = (float *)malloc(sizeof(float) * size);
-
-  /*pb_SwitchToTimer(&timers, pb_TimerID_IO);*/
-  FILE *fp = fopen(parameters->inpFiles[0], "rb");
-  read_data(h_A0 + 3, nx, ny, nz, fp);
-  fclose(fp);
-
-  pb_InitializeTimerSet(&timers);
-  __hpvm__init();
-
-  pb_SwitchToTimer(&timers, hpvm_TimerID_MEM_TRACK);
-  llvm_hpvm_track_mem(h_A0, sizeof(float) * size);
-  llvm_hpvm_track_mem(h_Anext, sizeof(float) * size);
-
-  pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
-
-  memcpy(h_Anext, h_A0, sizeof(float) * size);
-
-  // only use 1D thread block
-  int tx = 256 / TCF;
-  int block[3] = {tx, 1, 1};
-  int grid[3] = {(nx - 2 + TCF * tx - 1) / (TCF * tx) * tx, ny - 2, nz - 2};
-  // size_t grid[3] = {nx-2,ny-2,nz-2};
-  size_t offset[3] = {1, 1, 1};
-
-  printf("grid(%d, %d, %d), block(%d, %d, %d)\n", grid[0], grid[1], grid[2],
-         block[0], block[1], block[2]);
-  // main execution
-
-  int t;
-  size_t bytes = size * sizeof(float);
-  printf("A[126,1,1] = %f\n", h_A0[Index3D(nx, ny, 126, 1, 1)]);
-  printf("A[125,1,1] = %f\n", h_A0[Index3D(nx, ny, 125, 1, 1)]);
-  for (t = 0; t < iteration; t++) {
-    pb_SwitchToTimer(&timers, pb_TimerID_NONE);
-    unsigned stencilDFG = __hpvm__node(
-        naive_kernel, 2, 3, block[0], block[1], block[2], grid[0] / block[0],
-        grid[1] / block[1], grid[2] / block[2], 9, (float)c0, (float)c1, h_A0,
-        bytes, h_Anext, bytes, nx, ny, nz, 0);
-    __hpvm__wait(stencilDFG);
-    // printf("iteration %d\n",t);
-    pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
-    float *h_temp = h_A0;
-    h_A0 = h_Anext;
-    h_Anext = h_temp;
-  }
-
-  float *h_temp = h_A0;
-  h_A0 = h_Anext;
-  h_Anext = h_temp;
-  pb_SwitchToTimer(&timers, pb_TimerID_COPY);
-  llvm_hpvm_request_mem(h_Anext, bytes);
-  printf("A[126,1,1] = %f\n", h_Anext[Index3D(nx, ny, 126, 1, 1)]);
-  printf("A[125,1,1] = %f\n", h_Anext[Index3D(nx, ny, 125, 1, 1)]);
-
-  pb_SwitchToTimer(&timers, hpvm_TimerID_MEM_UNTRACK);
-
-  llvm_hpvm_untrack_mem(h_A0);
-  llvm_hpvm_untrack_mem(h_Anext);
-
-  pb_SwitchToTimer(&timers, pb_TimerID_NONE);
-  pb_PrintTimerSet(&timers);
-
-  __hpvm__cleanup();
-
-  if (parameters->outFile) {
-    /*pb_SwitchToTimer(&timers, pb_TimerID_IO);*/
-    outputData(parameters->outFile, h_Anext + 3, nx, ny, nz);
-  }
-  /*pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);*/
-  free(h_A0);
-  free(h_Anext);
-  pb_FreeParameters(parameters);
-
-  return 0;
-}