diff --git a/README.md b/README.md
index 4970eccbacb5ecd9fb30f2dcde065d43b672d6db..3f054d92b3965ec7cecec1af391c179608e1e719 100644
--- a/README.md
+++ b/README.md
@@ -7,24 +7,42 @@ This repository contains miscellaneous supporting materals for HPVM.
 [PPoPP'18 paper](http://rsim.cs.illinois.edu/Pubs/17-PPOPP-HPVM.pdf)
 
 ## Dependencies
-You would need to download and install the following components for using AVX and NVIDIA GPUs to speed up your programs
+You would need to download and install the following components for using NVIDIA GPUs to speed up your programs
 
 * Intel OpenCL SDK for Linux from [software.intel.com/sdk/opencl](software.intel.com/sdk/opencl). Follow the installation instructions (no special requirements).
 * CUDA
 
-## Build
+## Getting source code and building HPVM
 
-Switch to hpvm-reorg-9
+Checkout HPVM:
 ```shell
-git checkout hpvm-reorg-9
+git clone https://gitlab.engr.illinois.edu/llvm/hpvm.git
+git checkout hpvm-reorg-9 (this step may not be needed once code is mirrored on Github)
 ```
 
-Build hpvm
+HPVM installer script can be used to dowwnload, configure and build HPMV along with LLVM and other subprojects including Clang. 
 ```shell
 bash install.sh
 ```
+Specifically, the HPVM installer downloads the LLVM, Clang, compiler-rt, libcxxabi and lld, copies HPVM source into 
+llvm/tools and build the entire tree. LLVM C-Backend is also built as a part of HPVM and is currently used to perform 
+code generation in OpenCL for GPUs.
 
-Build hpvm runtime 
+Alternatively, CMake can be run manually.
+```shell
+cd hpvm/build
+cmake ../llvm [options]
+```
+Some common options that can be used with CMake are:
+
+* -DCMAKE_INSTALL_PREFIX=directory --- Specify for directory the full pathname of where you want the HPVM tools and libraries to be installed.
+
+* -DCMAKE_BUILD_TYPE=type --- Valid options for type are Debug, Release, RelWithDebInfo, and MinSizeRel. Default is Debug.
+
+* -DLLVM_ENABLE_ASSERTIONS=On --- Compile with assertion checks enabled (default is Yes for Debug builds, No for all other build types).
+
+## Building hpvm runtime
+HPVM also includes a runtime library which comprises of low-level, target-specific wrappers required by HPVM's code generation.
 ```shell
 cd projects/visc-rt
 make
@@ -40,14 +58,3 @@ export LLVM_SRC_ROOT=<full path to hpvm>/llvm
 Benchmark suites have been migrated to the LLVM 4.0 build. They are located
 in [VISC](/llvm/test/VISC/parboil/benchmarks).
 
-### Running an example (sgemm in parboil)
-```shell
-cd llvm/test/VISC/parboil/benchmarks/sgemm
-make 
-make run
-```
-
-### Other Old Components
-
-Search this repository for "visc", case-insensitive.
-
diff --git a/hpvm/test/parboil/benchmarks/bfs/Makefile b/hpvm/test/parboil/benchmarks/bfs/Makefile
index 8261dab47b1466863105d5f1aafd18a538661540..cc6db678298c4c66312248cc4f7a2df0bd134d3f 100644
--- a/hpvm/test/parboil/benchmarks/bfs/Makefile
+++ b/hpvm/test/parboil/benchmarks/bfs/Makefile
@@ -1,4 +1,4 @@
-PARBOIL_ROOT = $(LLVM_SRC_ROOT)/test/VISC/parboil
+PARBOIL_ROOT = $(LLVM_SRC_ROOT)/tools/hpvm/test/parboil
 APP = bfs
 
 # Default compile visc
diff --git a/hpvm/test/parboil/benchmarks/bfs/src/visc/main.cpp b/hpvm/test/parboil/benchmarks/bfs/src/visc/main.cpp
index 1eec80bb2c78cb86efe3c71c83d2280e95104f5a..6227ef498f10eb82e685f4dab518caf17e7757ac 100644
--- a/hpvm/test/parboil/benchmarks/bfs/src/visc/main.cpp
+++ b/hpvm/test/parboil/benchmarks/bfs/src/visc/main.cpp
@@ -309,7 +309,45 @@ void BFS_Root(int *q1, size_t bytesq1,
   
 }
 
+void BFS_Wrapper(
+  int *q1, size_t bytesq1, // 0, 1
+  int *q2, size_t bytesq2, // 2, 3
+  struct Node *g_graph_nodes, size_t bytesg_graph_nodes, // 4, 5
+  struct Edge *g_graph_edges, size_t bytesg_graph_edges, // 6, 7
+  int *g_color, size_t bytesg_color, // 8, 9
+  int *g_cost, size_t bytesg_cost, // 10, 11
+  int *tail, size_t bytestail, // 12, 13
+  int no_of_nodes, int gray_shade, // 14, 15
+  int k, long block, long grid // 16 - 18
+) {
+  __visc__hint(visc::CPU_TARGET);
+  __visc__attributes(
+    6, q1, g_graph_nodes, g_graph_edges, g_color, g_cost, tail,
+    4, q2, g_color, g_cost, tail
+  );
+  void* BlockingBFSNode = __visc__createNodeND(0, BFS_Root);
 
+  // Bind edges
+  __visc__bindIn(BlockingBFSNode, 0, 0, 0); // Bind q1
+  __visc__bindIn(BlockingBFSNode, 1, 1, 0); // Bind bytes_q1
+  __visc__bindIn(BlockingBFSNode, 2, 2, 0); // Bind q2
+  __visc__bindIn(BlockingBFSNode, 3, 3, 0); // Bind bytes_q2 
+  __visc__bindIn(BlockingBFSNode, 4, 4, 0); // Bind graph_nodes
+  __visc__bindIn(BlockingBFSNode, 5, 5, 0); // Bind bytes_graph_nodes
+  __visc__bindIn(BlockingBFSNode, 6, 6, 0); // Bind graph_edges
+  __visc__bindIn(BlockingBFSNode, 7, 7, 0); // Bind bytes_graph_edges
+  __visc__bindIn(BlockingBFSNode, 8, 8, 0); // Bind color
+  __visc__bindIn(BlockingBFSNode, 9, 9, 0); // Bind bytes_color
+  __visc__bindIn(BlockingBFSNode, 10, 10, 0); // Bind cost
+  __visc__bindIn(BlockingBFSNode, 11, 11, 0); // Bind bytes_cost
+  __visc__bindIn(BlockingBFSNode, 12, 12, 0); // Bind tail
+  __visc__bindIn(BlockingBFSNode, 13, 13, 0); // Bind bytes_tail
+  __visc__bindIn(BlockingBFSNode, 14, 14, 0); // Bind no_of_nodes
+  __visc__bindIn(BlockingBFSNode, 15, 15, 0); // Bind gray_shade
+  __visc__bindIn(BlockingBFSNode, 16, 16, 0); // Bind k
+  __visc__bindIn(BlockingBFSNode, 17, 17, 0); // Bind block 
+  __visc__bindIn(BlockingBFSNode, 18, 18, 0); // Bind grid 
+}
 
 FILE *fp;
 char* readFile(const char* fileName)
@@ -509,7 +547,7 @@ int main( int argc, char** argv)
                                 //gray, 
                                 //k,
                                 //0);
-    void* bfsDFG = __visc__launch(0, BFS_Root, (void*) args);
+    void* bfsDFG = __visc__launch(0, BFS_Wrapper, (void*) args);
     __visc__wait(bfsDFG);
     // Swap q1 and q2
     // Swap q1 and q2
diff --git a/hpvm/test/parboil/benchmarks/cutcp/Makefile b/hpvm/test/parboil/benchmarks/cutcp/Makefile
index d00d743bb59e5dd22abc713cad1483e316d4acb0..5e56793360aa479f604883f63b41a3ab8bb0cc58 100644
--- a/hpvm/test/parboil/benchmarks/cutcp/Makefile
+++ b/hpvm/test/parboil/benchmarks/cutcp/Makefile
@@ -1,4 +1,4 @@
-PARBOIL_ROOT = $(LLVM_SRC_ROOT)/test/VISC/parboil
+PARBOIL_ROOT = $(LLVM_SRC_ROOT)/tools/hpvm/test/parboil
 APP = cutcp
 
 # Default compile visc
diff --git a/hpvm/test/parboil/benchmarks/cutcp/src/visc/main.cpp b/hpvm/test/parboil/benchmarks/cutcp/src/visc/main.cpp
index 33dd2d68ca4c98b5ef877cfcfff3f243176c0010..c26621737c4c5979d863ccb7b42a8d4132f1b5c1 100644
--- a/hpvm/test/parboil/benchmarks/cutcp/src/visc/main.cpp
+++ b/hpvm/test/parboil/benchmarks/cutcp/src/visc/main.cpp
@@ -50,6 +50,8 @@ typedef struct __attribute__((__packed__)) __attribute__((aligned(16))){
     float w;
 } float4;
 
+extern float rsqrt(float x);
+
 void Allocation(long block) {
     // Memory shared between threadblocks
     size_t bytes_AtomBinCache = sizeof(float)*BIN_CACHE_MAXLEN * BIN_DEPTH * 4;
@@ -121,9 +123,9 @@ void CUTCPLeaf(
     int numbins;
 
     /* bin number determined by center of region */
-    myBinIndex[0] = (int) __visc__floor((8 * xRegionIndex + 4) * h * BIN_INVLEN);
-    myBinIndex[1] = (int) __visc__floor((8 * yRegionIndex + 4) * h * BIN_INVLEN);
-    myBinIndex[2] = (int) __visc__floor((8 * zRegionIndex + 4) * h * BIN_INVLEN);
+    myBinIndex[0] = (int) floor((8 * xRegionIndex + 4) * h * BIN_INVLEN);
+    myBinIndex[1] = (int) floor((8 * yRegionIndex + 4) * h * BIN_INVLEN);
+    myBinIndex[2] = (int) floor((8 * zRegionIndex + 4) * h * BIN_INVLEN);
 
     /* first neighbor in list for me to cache */
     nbrid = (tid >> 4);
@@ -194,7 +196,7 @@ void CUTCPLeaf(
                 if (r2 < cutoff2)
                 {
                     float s = (1.f - r2 * inv_cutoff2);
-                    energy0 += aq * __visc__rsqrt(r2) * s * s;
+                    energy0 += aq * rsqrt(r2) * s * s;
                     //energy0 += aq * (1.0/__visc__sqrt(r2)) * s * s;
                 }
 #else
@@ -207,7 +209,7 @@ void CUTCPLeaf(
                 if (r2 < cutoff2)
                 {
                     float s = (1.f - r2 * inv_cutoff2);
-                    energy1 += aq * __visc__rsqrt(r2) * s * s;
+                    energy1 += aq * rsqrt(r2) * s * s;
                     //energy1 += aq * (1.0/__visc__sqrt(r2)) * s * s;
                 }
 #else
@@ -219,7 +221,7 @@ void CUTCPLeaf(
                 if (r2 < cutoff2)
                 {
                     float s = (1.f - r2 * inv_cutoff2);
-                    energy2 += aq * __visc__rsqrt(r2) * s * s;
+                    energy2 += aq * rsqrt(r2) * s * s;
                     //energy2 += aq * (1.0/__visc__sqrt(r2)) * s * s;
                 }
 #else
@@ -231,8 +233,8 @@ void CUTCPLeaf(
                 if (r2 < cutoff2)
                 {
                     float s = (1.f - r2 * inv_cutoff2);
-                    energy3 += aq * __visc__rsqrt(r2) * s * s;
-                    //energy3 += aq * (1.0/__visc__rsqrt(r2)) * s * s;
+                    energy3 += aq * rsqrt(r2) * s * s;
+                    //energy3 += aq * (1.0/rsqrt(r2)) * s * s;
                 }
 #else
                 energy3 += (r2 < cutoff2);
@@ -418,6 +420,55 @@ void CUTCPRoot(
 }
 
 
+void CUTCPWrapper(
+    int binDim_x,
+    int binDim_y,
+    float4 *binBaseAddr, size_t bytes_binBaseAddr,
+    int offset,
+    float h,                /* lattice spacing */
+    float cutoff2,          /* square of cutoff distance */
+    float inv_cutoff2,
+    ener_t *regionZeroAddr, size_t bytes_regionZeroAddr, /* address of lattice regions starting at origin */
+    int zRegionIndex,
+    // constant memory arguments the next two
+    int *NbrListLen, size_t bytes_NbrListLen,
+    xyz *NbrList, size_t bytes_NbrList,
+    long blockx,
+    long blocky,
+    long blockz,
+    long gridx,
+    long gridy,
+    long gridz
+) {
+    __visc__hint(visc::CPU_TARGET);
+    __visc__attributes(4, binBaseAddr, regionZeroAddr, NbrListLen, NbrList, 1, regionZeroAddr);
+
+    void* BlockingCUTCPNode = __visc__createNodeND(0, CUTCPRoot);
+
+    // Bind Inputs
+    __visc__bindIn(BlockingCUTCPNode, 0, 0, 0); // Bind binDim_x
+    __visc__bindIn(BlockingCUTCPNode, 1, 1, 0); // Bind binDim_y
+    __visc__bindIn(BlockingCUTCPNode, 2, 2, 0); // Bind binBaseAddr
+    __visc__bindIn(BlockingCUTCPNode, 3, 3, 0); // Bind bytes_binBaseAddr
+    __visc__bindIn(BlockingCUTCPNode, 4, 4, 0); // Bind offset
+    __visc__bindIn(BlockingCUTCPNode, 5, 5, 0); // Bind h
+    __visc__bindIn(BlockingCUTCPNode, 6, 6, 0); // Bind cutoff2
+    __visc__bindIn(BlockingCUTCPNode, 7, 7, 0); // Bind inv_cutoff2
+    __visc__bindIn(BlockingCUTCPNode, 8, 8, 0); // Bind regionZeroAddr
+    __visc__bindIn(BlockingCUTCPNode, 9, 9, 0); // Bind bytes_regionZeroAddr
+    __visc__bindIn(BlockingCUTCPNode, 10, 10, 0); // Bind zRegionIndex
+    __visc__bindIn(BlockingCUTCPNode, 11, 11, 0); // Bind NbrListLen
+    __visc__bindIn(BlockingCUTCPNode, 12, 12, 0); // Bind bytes_NbrListLen
+    __visc__bindIn(BlockingCUTCPNode, 13, 13, 0); // Bind NbrList
+    __visc__bindIn(BlockingCUTCPNode, 14, 14, 0); // Bind bytes_NbrList
+    __visc__bindIn(BlockingCUTCPNode, 15, 15, 0); // Bind blockx
+    __visc__bindIn(BlockingCUTCPNode, 16, 16, 0); // Bind blocky
+    __visc__bindIn(BlockingCUTCPNode, 17, 17, 0); // Bind blockz
+    __visc__bindIn(BlockingCUTCPNode, 18, 18, 0); // Bind gridx
+    __visc__bindIn(BlockingCUTCPNode, 19, 19, 0); // Bind gridy
+    __visc__bindIn(BlockingCUTCPNode, 20, 20, 0); // Bind gridz
+}
+
 // ==================== Host Code ==============================
 
 int gpu_compute_cutoff_potential_lattice6overlap(
@@ -938,7 +989,7 @@ int gpu_compute_cutoff_potential_lattice6overlap(
 
         args->zRegionIndex = zRegionIndex;
 
-        CUTCP_DFG = __visc__launch(0, CUTCPRoot, (void*)args);
+        CUTCP_DFG = __visc__launch(0, CUTCPWrapper, (void*)args);
         __visc__wait(CUTCP_DFG);
         //llvm_visc_request_mem(regionZeroAddr, lnall*sizeof(ener_t));
     }
diff --git a/hpvm/test/parboil/benchmarks/hpvm-cava/src/main.c b/hpvm/test/parboil/benchmarks/hpvm-cava/src/main.c
index e43bbb4f25c4c97c9907ebae37251c854860c3b5..c1c0130b4c2c0ec6ec7e792c72323b03a4d508a5 100644
--- a/hpvm/test/parboil/benchmarks/hpvm-cava/src/main.c
+++ b/hpvm/test/parboil/benchmarks/hpvm-cava/src/main.c
@@ -17,6 +17,11 @@ int NUM_CLASSES;
 int INPUT_DIM;
 int NUM_WORKER_THREADS;
 
+// Type of struct holding the return value from the last node.
+struct RetStruct {
+  size_t bytesRet;
+}; 
+
 // Type of struct that is used to pass arguments to the HPVM dataflow graph
 // using the hpvm launch operation
 typedef struct __attribute__((__packed__)) {
@@ -34,7 +39,8 @@ typedef struct __attribute__((__packed__)) {
     float*coefs; size_t bytes_coefs;
     float *l2_dist; size_t bytes_l2_dist;
     float *tone_map; size_t bytes_tone_map;
-    size_t row_size; size_t col_size;
+    int row_size; int col_size;
+    struct RetStruct ret; // Instance of RetStruct holding the return value.
 } 
 RootIn;
 
@@ -807,6 +813,10 @@ int main(int argc, char* argv[]) {
     __visc__wait(camPipeDFG);
 
     printf("\n\nPipeline execution completed!\n");
+    printf(
+      "Pipeline final stage returned %lu; should be %lu\n",
+      rootArgs->ret.bytesRet, bytes_image
+    );
     printf("\n\nRequesting memory!\n");
 
     // Request data from graph.    
diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/visc_sh/main.cc b/hpvm/test/parboil/benchmarks/sgemm/src/visc_sh/main.cc
index 161ec4505707e050ed8700a700e44c9a882049e7..16f2341a2203e3510b9c00a91eedd3ac53d296d4 100644
--- a/hpvm/test/parboil/benchmarks/sgemm/src/visc_sh/main.cc
+++ b/hpvm/test/parboil/benchmarks/sgemm/src/visc_sh/main.cc
@@ -187,19 +187,13 @@ void SgemmTB(float *A, size_t bytesA,
 }
 
 // Root node for sgemm - Creates work group node
-void SgemmRoot(float *A, size_t bytesA,
-               int lda,
-               float *B, size_t bytesB,
-               int ldb,
-               float *C, size_t bytesC,
-               int ldc,
-               int k,
-               float alpha,
-               float beta,
-               long block_x,
-               long block_y,
-               long grid_x,
-               long grid_y) {
+void SgemmRoot(
+    float *A, size_t bytesA, int lda, // 0-2
+    float *B, size_t bytesB, int ldb, // 3-5
+    float *C, size_t bytesC, int ldc, // 6-8
+    int k, float alpha, float beta, // 9-11
+    long block_x, long block_y, long grid_x, long grid_y // 12-15
+) {
     __visc__hint(visc::CPU_TARGET);
     __visc__attributes(3, A, B, C, 1, C);
     void* SgemmTBNode = __visc__createNodeND(2, SgemmTB, grid_x, grid_y);
@@ -222,6 +216,36 @@ void SgemmRoot(float *A, size_t bytesA,
 
 }
 
+void SgemmWrapper(
+    float *A, size_t bytesA, int lda, // 0-2
+    float *B, size_t bytesB, int ldb, // 3-5
+    float *C, size_t bytesC, int ldc, // 6-8
+    int k, float alpha, float beta, // 9-11
+    long block_x, long block_y, long grid_x, long grid_y // 12-15
+) {
+    __visc__hint(visc::CPU_TARGET);
+    __visc__attributes(3, A, B, C, 1, C);
+    void* SgemmRootNode = __visc__createNodeND(0, SgemmRoot);
+
+    // Bind edges
+    __visc__bindIn(SgemmRootNode, 0, 0, 0); // Bind A
+    __visc__bindIn(SgemmRootNode, 1, 1, 0); // Bind bytesA
+    __visc__bindIn(SgemmRootNode, 2, 2, 0); // Bind lda
+    __visc__bindIn(SgemmRootNode, 3, 3, 0); // Bind B
+    __visc__bindIn(SgemmRootNode, 4, 4, 0); // Bind bytesB
+    __visc__bindIn(SgemmRootNode, 5, 5, 0); // Bind ldb
+    __visc__bindIn(SgemmRootNode, 6, 6, 0); // Bind C
+    __visc__bindIn(SgemmRootNode, 7, 7, 0); // Bind bytesC
+    __visc__bindIn(SgemmRootNode, 8, 8, 0); // Bind ldc
+    __visc__bindIn(SgemmRootNode, 9, 9, 0); // Bind k
+    __visc__bindIn(SgemmRootNode, 10, 10, 0); // Bind alpha
+    __visc__bindIn(SgemmRootNode, 11, 11, 0); // Bind beta
+    __visc__bindIn(SgemmRootNode, 12, 12, 0); // Bind block_x
+    __visc__bindIn(SgemmRootNode, 13, 13, 0); // Bind block_y
+    __visc__bindIn(SgemmRootNode, 14, 14, 0); // Bind grid_x
+    __visc__bindIn(SgemmRootNode, 15, 15, 0); // Bind grid_y
+}
+
 // Creates root node for sgemm
 __attribute__((noinline)) void basicSgemm(struct pb_TimerSet* timers, char transa, char transb, int m, int n, int k, float alpha, float* A, size_t bytesA, int lda, float* B, size_t bytesB, int ldb, float beta, float* C, size_t bytesC, int ldc )
 {
@@ -269,7 +293,7 @@ __attribute__((noinline)) void basicSgemm(struct pb_TimerSet* timers, char trans
             );
 
     pb_SwitchToTimer( timers, visc_TimerID_COMPUTATION );
-    void* sgemmDFG = __visc__launch(0, SgemmRoot, (void*) args);
+    void* sgemmDFG = __visc__launch(0, SgemmWrapper, (void*) args);
 
     __visc__wait(sgemmDFG);
     pb_SwitchToTimer( timers, pb_TimerID_COMPUTE );
diff --git a/hpvm/test/parboil/benchmarks/tpacf/Makefile b/hpvm/test/parboil/benchmarks/tpacf/Makefile
index 0325de5b15be8dc682e8ef472d2b9a84e9a3729e..6140acd5ac3a196c8750b997c2e5904ba9585839 100644
--- a/hpvm/test/parboil/benchmarks/tpacf/Makefile
+++ b/hpvm/test/parboil/benchmarks/tpacf/Makefile
@@ -1,4 +1,4 @@
-PARBOIL_ROOT = $(LLVM_SRC_ROOT)/test/VISC/parboil
+PARBOIL_ROOT = $(LLVM_SRC_ROOT)/tools/hpvm/test/parboil
 APP = tpacf
 
 # Default compile visc
diff --git a/hpvm/test/parboil/benchmarks/tpacf/src/visc/main.cc b/hpvm/test/parboil/benchmarks/tpacf/src/visc/main.cc
index be44d864409cb3f3b8f4799df0387441ca89785d..d1482d732947aefc2f3eafb380f584680e692f7f 100644
--- a/hpvm/test/parboil/benchmarks/tpacf/src/visc/main.cc
+++ b/hpvm/test/parboil/benchmarks/tpacf/src/visc/main.cc
@@ -232,10 +232,7 @@ void BlockingTPACF(hist_t* histograms, size_t bytes_histograms,
                    // next arg is read-only constant
                    float* binb, size_t bytes_binb,
                    int NUM_SETS, int NUM_ELEMENTS,
-                   long block,
-                   // shared memory args
-                   struct cartesian* data_s, size_t bytes_data_s,
-                   unsigned int* warp_hists, size_t bytes_warp_hists) {
+                   long block) {
 
   __visc__hint(visc::CPU_TARGET);
   __visc__attributes(2, all_x_data, binb, 1, histograms);
@@ -286,6 +283,32 @@ void TPACFRoot(hist_t* histograms, size_t bytes_histograms,
 
 }
 
+void TPACFWrapper(
+  hist_t* histograms, size_t bytes_histograms,
+  float* all_x_data, size_t bytes_all_data,
+  // next arg is read-only constant
+  float* binb, size_t bytes_binb,
+  int NUM_SETS, int NUM_ELEMENTS,
+  long block, long grid
+) {
+  __visc__hint(visc::CPU_TARGET);
+  __visc__attributes(2, all_x_data, binb, 1, histograms);
+
+  void* BlockingTPACFNode = __visc__createNodeND(0, TPACFRoot);
+
+  // Bind Inputs
+  __visc__bindIn(BlockingTPACFNode, 0, 0, 0); // Bind histograms
+  __visc__bindIn(BlockingTPACFNode, 1, 1, 0); // Bind bytes_histograms
+  __visc__bindIn(BlockingTPACFNode, 2, 2, 0); // Bind all_x_data
+  __visc__bindIn(BlockingTPACFNode, 3, 3, 0); // Bind bytes_all_data
+  __visc__bindIn(BlockingTPACFNode, 4, 4, 0); // Bind binb
+  __visc__bindIn(BlockingTPACFNode, 5, 5, 0); // Bind bytes_binb
+  __visc__bindIn(BlockingTPACFNode, 6, 6, 0); // Bind NUM_SETS
+  __visc__bindIn(BlockingTPACFNode, 7, 7, 0); // Bind NUM_ELEMENTS
+  __visc__bindIn(BlockingTPACFNode, 8, 8, 0); // Bind block
+  __visc__bindIn(BlockingTPACFNode, 9, 9, 0); // Bind grid
+}
+
 // **===-----------------------------------------------------------===**
 
 int