diff --git a/llvm/test/VISC/parboil/benchmarks/bfs/src/visc_base/main.cpp b/llvm/test/VISC/parboil/benchmarks/bfs/src/visc_base/main.cpp
index 1f9a8d3ef873e610b0919e2f18566959eb3773fb..b5d7afc477cf7c85f88f0e9688c758fe2b66774b 100644
--- a/llvm/test/VISC/parboil/benchmarks/bfs/src/visc_base/main.cpp
+++ b/llvm/test/VISC/parboil/benchmarks/bfs/src/visc_base/main.cpp
@@ -140,8 +140,7 @@ BFSLeaf(int *q1, size_t bytesq1,
            ) 
 {
 
-  //__visc__hint(visc::GPU_TARGET);
-  __visc__hint(visc::SPIR_TARGET);
+  __visc__hint(visc::DEVICE);
   __visc__attributes(6, q1, g_graph_nodes, g_graph_edges, g_color, g_cost, tail,
                       4, q2, g_color, g_cost, tail);
 
@@ -233,7 +232,7 @@ void BlockingBFS(int *q1, size_t bytesq1,
                 int *local_q_tail, size_t byteslocal_q_tail, 
                 int *local_q, size_t byteslocal_q,
                 int *shift, size_t bytesshift) {
-  __visc__hint(visc::GPU_TARGET);
+  __visc__hint(visc::CPU_TARGET);
   __visc__attributes(6, q1, g_graph_nodes, g_graph_edges, g_color, g_cost, tail,
                       4, q2, g_color, g_cost, tail);
 
diff --git a/llvm/test/VISC/parboil/benchmarks/cutcp/src/visc/main.cpp b/llvm/test/VISC/parboil/benchmarks/cutcp/src/visc/main.cpp
index 10fd7a2a04d6c5fe38c88ef0953f000dbbbc9de6..0297d39f8b0bd2db32fc6ac1af4d6b926b50189f 100644
--- a/llvm/test/VISC/parboil/benchmarks/cutcp/src/visc/main.cpp
+++ b/llvm/test/VISC/parboil/benchmarks/cutcp/src/visc/main.cpp
@@ -78,8 +78,7 @@ void CUTCPLeaf(
     int* myBinIndex, size_t bytes_myBinIndex
 )
 {
-    //__visc__hint(visc::GPU_TARGET);
-    __visc__hint(visc::SPIR_TARGET);
+    __visc__hint(visc::DEVICE);
     __visc__attributes(4, binBaseAddr, regionZeroAddr, NbrListLen, NbrList, 1, regionZeroAddr);
 
     void* thisNode = __visc__getNode();
diff --git a/llvm/test/VISC/parboil/benchmarks/histo/src/visc/main.cpp b/llvm/test/VISC/parboil/benchmarks/histo/src/visc/main.cpp
index 348f04e0a092cbc50bb21595f128e2dcdc113bee..bb154e097268b646ab63953d45b69116ea581fb0 100644
--- a/llvm/test/VISC/parboil/benchmarks/histo/src/visc/main.cpp
+++ b/llvm/test/VISC/parboil/benchmarks/histo/src/visc/main.cpp
@@ -60,7 +60,7 @@ void PrescanLeaf (unsigned int* input, size_t bytes_input,
                   float* StdDev, size_t bytes_StdDev) {
 
 
-    __visc__hint(visc::GPU_TARGET);
+    __visc__hint(visc::DEVICE);
     __visc__attributes(2, input, minmax, 1, minmax);
 
     void* thisNode = __visc__getNode();
@@ -145,7 +145,7 @@ void PrescanBlock (unsigned int* input, size_t bytes_input,
                    unsigned int* minmax, size_t bytes_minmax,
                    int block) {
 
-    __visc__hint(visc::GPU_TARGET);
+    __visc__hint(visc::CPU_TARGET);
     __visc__attributes(2, input, minmax, 1, minmax);
 
     void* AllocationNode = __visc__createNode(PrescanAllocation);
@@ -257,7 +257,7 @@ void IntermediatesLeaf( uint2 *input, size_t bytes_input,
                         unsigned int input_pitch,
                         uchar4 *sm_mappings, size_t bytes_sm_mappings) {
 
-    __visc__hint(visc::GPU_TARGET);
+    __visc__hint(visc::DEVICE);
     __visc__attributes(1, input, 1, sm_mappings);
 
     void* thisNode = __visc__getNode();
@@ -302,7 +302,7 @@ void IntermediatesBlock(uint2 *input, size_t bytes_input,
                         uchar4 *sm_mappings, size_t bytes_sm_mappings,
                         int block) {
 
-    __visc__hint(visc::GPU_TARGET);
+    __visc__hint(visc::CPU_TARGET);
     __visc__attributes(1, input, 1, sm_mappings);
 
     void* IntermediatesLeafNode = __visc__createNode1D(IntermediatesLeaf, block);
@@ -513,7 +513,7 @@ void MainLeaf ( uchar4* sm_mappings, size_t bytes_sm_mappings,
      * and also scans every N-th line for interesting data.  N = gridDim.x
      */
 
-    __visc__hint(visc::GPU_TARGET);
+    __visc__hint(visc::DEVICE);
     __visc__attributes(1, sm_mappings, 3, global_subhisto, global_histo, global_overflow);
 
     void* thisNode = __visc__getNode();
@@ -596,7 +596,7 @@ void MainBlock (uchar4* sm_mappings, size_t bytes_sm_mappings,
                 int blockx, int blocky,
                 int gridx, int gridy) {
 
-    __visc__hint(visc::GPU_TARGET);
+    __visc__hint(visc::CPU_TARGET);
     __visc__attributes(1, sm_mappings, 3, global_subhisto, global_histo, global_overflow);
 
     void* AllocationNode = __visc__createNode(MainAllocation);
@@ -637,7 +637,7 @@ void MainRoot (uchar4* sm_mappings, size_t bytes_sm_mappings,
                int blockx, int blocky,
                int gridx, int gridy) {
 
-    __visc__hint(visc::GPU_TARGET);
+    __visc__hint(visc::CPU_TARGET);
     __visc__attributes(1, sm_mappings, 3, global_subhisto, global_histo, global_overflow);
 
     void* MainBlockNode = __visc__createNode2D(MainBlock, gridx, gridy);
@@ -741,7 +741,7 @@ void FinalLeaf (
     unsigned int *global_overflow, size_t bytes_global_overflow,
     unsigned int *final_histo, size_t bytes_final_histo) //final output
 {
-    __visc__hint(visc::GPU_TARGET);
+    __visc__hint(visc::DEVICE);
     __visc__attributes(3, global_subhisto, global_histo, global_overflow, 1, final_histo);
 
     void* thisNode = __visc__getNode();
@@ -859,7 +859,7 @@ void FinalBlock(
     unsigned int *final_histo, size_t bytes_final_histo, //final output
     int block) {
 
-    __visc__hint(visc::GPU_TARGET);
+    __visc__hint(visc::CPU_TARGET);
     __visc__attributes(3, global_subhisto, global_histo, global_overflow, 1, final_histo);
     
     void* FinalLeafNode = __visc__createNode1D(FinalLeaf, block);
@@ -891,7 +891,7 @@ void FinalRoot(
     int block,
     int grid) {
 
-    __visc__hint(visc::GPU_TARGET);
+    __visc__hint(visc::CPU_TARGET);
     __visc__attributes(3, global_subhisto, global_histo, global_overflow, 1, final_histo);
     
     void* FinalBlockNode = __visc__createNode1D(FinalBlock, grid);
@@ -1055,7 +1055,6 @@ int main(int argc, char* argv[]) {
     llvm_visc_track_mem(input, bytes_input);
     llvm_visc_track_mem(ranges, bytes_ranges);
     llvm_visc_track_mem(sm_mappings, bytes_sm_mappings);
-    llvm_visc_track_mem(global_subhisto, bytes_global_subhisto);
     llvm_visc_track_mem(global_histo, bytes_global_histo);
     llvm_visc_track_mem(global_overflow, bytes_global_overflow);
     llvm_visc_track_mem(final_histo, bytes_final_histo);
@@ -1151,7 +1150,9 @@ int main(int argc, char* argv[]) {
     numIterations = 1;
 #endif
 
+        //memset(global_subhisto, 0, img_width*histo_height*sizeof(unsigned int));
     for (int iter = 0; iter < numIterations; iter++) {
+        llvm_visc_track_mem(global_subhisto, bytes_global_subhisto);
         pb_SwitchToSubTimer(&timers, postpremems , visc_TimerID_COMPUTATION);
         //llvm_visc_request_mem(ranges, bytes_ranges);
         //llvm_visc_request_mem(input, bytes_input);
@@ -1186,10 +1187,10 @@ int main(int argc, char* argv[]) {
 #endif
         // Requesting only so that we can write it to zero. IT should not be
         // copied from device to host
-        //llvm_visc_request_mem(global_subhisto, bytes_global_subhisto);
+        llvm_visc_request_mem(global_subhisto, bytes_global_subhisto);
         // Set global_subhisto to zero
+        memset(global_subhisto, 0, bytes_global_subhisto);
         //pb_SwitchToSubTimer(&timers, memsets , visc_TimerID_COMPUTATION);
-        //memset(global_subhisto, 0, img_width*histo_height*sizeof(unsigned int));
 
         pb_SwitchToSubTimer(&timers, intermediates, visc_TimerID_COMPUTATION);
 
@@ -1228,6 +1229,7 @@ int main(int argc, char* argv[]) {
         __visc__wait(FinalDFG);
 
         pb_SwitchToSubTimer(&timers, viscOverhead, visc_TimerID_COMPUTATION);
+        llvm_visc_untrack_mem(global_subhisto);
     }
 
     pb_SwitchToTimer(&timers, pb_TimerID_COPY);
diff --git a/llvm/test/VISC/parboil/benchmarks/sgemm/src/visc_sh/main.cc b/llvm/test/VISC/parboil/benchmarks/sgemm/src/visc_sh/main.cc
index 6e60ddd11ae4fe6a842b563ea9dfeaff2dd4fbb6..2a1bb56e6d9f869bec78109f0aead055305e23fb 100644
--- a/llvm/test/VISC/parboil/benchmarks/sgemm/src/visc_sh/main.cc
+++ b/llvm/test/VISC/parboil/benchmarks/sgemm/src/visc_sh/main.cc
@@ -107,7 +107,7 @@ void Allocation(int block_x, int block_y) {
 
 void SgemmLeaf( float* A, size_t bytesA, int lda, float* B, size_t bytesB, int ldb, float* C, size_t bytesC, int ldc, int k, float alpha, float beta, float* shB, size_t bytesshB )
 {
-    __visc__hint(visc::GPU_TARGET);
+    __visc__hint(visc::DEVICE);
     //__visc__hint(visc::SPIR_TARGET);
     // TODO: shB is not an in or out attribute
     __visc__attributes(3, A, B, C, 1, C);
diff --git a/llvm/test/VISC/parboil/benchmarks/tpacf/src/visc/main.cc b/llvm/test/VISC/parboil/benchmarks/tpacf/src/visc/main.cc
index 1158ffc04a03411ee5f34552f5ef8e74a27c50e0..4fe3cbf526d716d63319cb4d75a2e2c779cc3a79 100644
--- a/llvm/test/VISC/parboil/benchmarks/tpacf/src/visc/main.cc
+++ b/llvm/test/VISC/parboil/benchmarks/tpacf/src/visc/main.cc
@@ -90,7 +90,7 @@ void TPACFLeaf(hist_t* histograms, size_t bytes_histograms,
                //struct cartesian* data_s, size_t bytes_data_s,
                unsigned int* warp_hists, size_t bytes_warp_hists) {
 
-  __visc__hint(visc::GPU_TARGET);
+  __visc__hint(visc::DEVICE);
   __visc__attributes(2, all_x_data, binb, 1, histograms);
 
   void* thisNode = __visc__getNode();
@@ -237,7 +237,7 @@ void BlockingTPACF(hist_t* histograms, size_t bytes_histograms,
                    struct cartesian* data_s, size_t bytes_data_s,
                    unsigned int* warp_hists, size_t bytes_warp_hists) {
 
-  __visc__hint(visc::GPU_TARGET);
+  __visc__hint(visc::CPU_TARGET);
   __visc__attributes(2, all_x_data, binb, 1, histograms);
 
   void* AllocationNode = __visc__createNode(Allocation);
diff --git a/llvm/test/VISC/parboil/common/include/visc.h b/llvm/test/VISC/parboil/common/include/visc.h
index 29b5be348cca9d81ffc409aa1f342bfc519d7779..00ef97a7ff1362f252e04a70318e9d5957e781cf 100644
--- a/llvm/test/VISC/parboil/common/include/visc.h
+++ b/llvm/test/VISC/parboil/common/include/visc.h
@@ -6,7 +6,12 @@
  *cr
  ***************************************************************************/
 
+#ifndef DEVICE
+#define DEVICE GPU_TARGET
+#endif
+
 #include "llvm/SupportVISC/VISCHint.h"
+
 #ifdef __cplusplus
 extern "C" {
 void __visc__hint(visc::Target);