diff --git a/llvm/test/VISC/parboil/benchmarks/bfs/src/visc_base/main.cpp b/llvm/test/VISC/parboil/benchmarks/bfs/src/visc_base/main.cpp index 1f9a8d3ef873e610b0919e2f18566959eb3773fb..b5d7afc477cf7c85f88f0e9688c758fe2b66774b 100644 --- a/llvm/test/VISC/parboil/benchmarks/bfs/src/visc_base/main.cpp +++ b/llvm/test/VISC/parboil/benchmarks/bfs/src/visc_base/main.cpp @@ -140,8 +140,7 @@ BFSLeaf(int *q1, size_t bytesq1, ) { - //__visc__hint(visc::GPU_TARGET); - __visc__hint(visc::SPIR_TARGET); + __visc__hint(visc::DEVICE); __visc__attributes(6, q1, g_graph_nodes, g_graph_edges, g_color, g_cost, tail, 4, q2, g_color, g_cost, tail); @@ -233,7 +232,7 @@ void BlockingBFS(int *q1, size_t bytesq1, int *local_q_tail, size_t byteslocal_q_tail, int *local_q, size_t byteslocal_q, int *shift, size_t bytesshift) { - __visc__hint(visc::GPU_TARGET); + __visc__hint(visc::CPU_TARGET); __visc__attributes(6, q1, g_graph_nodes, g_graph_edges, g_color, g_cost, tail, 4, q2, g_color, g_cost, tail); diff --git a/llvm/test/VISC/parboil/benchmarks/cutcp/src/visc/main.cpp b/llvm/test/VISC/parboil/benchmarks/cutcp/src/visc/main.cpp index 10fd7a2a04d6c5fe38c88ef0953f000dbbbc9de6..0297d39f8b0bd2db32fc6ac1af4d6b926b50189f 100644 --- a/llvm/test/VISC/parboil/benchmarks/cutcp/src/visc/main.cpp +++ b/llvm/test/VISC/parboil/benchmarks/cutcp/src/visc/main.cpp @@ -78,8 +78,7 @@ void CUTCPLeaf( int* myBinIndex, size_t bytes_myBinIndex ) { - //__visc__hint(visc::GPU_TARGET); - __visc__hint(visc::SPIR_TARGET); + __visc__hint(visc::DEVICE); __visc__attributes(4, binBaseAddr, regionZeroAddr, NbrListLen, NbrList, 1, regionZeroAddr); void* thisNode = __visc__getNode(); diff --git a/llvm/test/VISC/parboil/benchmarks/histo/src/visc/main.cpp b/llvm/test/VISC/parboil/benchmarks/histo/src/visc/main.cpp index 348f04e0a092cbc50bb21595f128e2dcdc113bee..bb154e097268b646ab63953d45b69116ea581fb0 100644 --- a/llvm/test/VISC/parboil/benchmarks/histo/src/visc/main.cpp +++ b/llvm/test/VISC/parboil/benchmarks/histo/src/visc/main.cpp @@ -60,7 +60,7 @@ void PrescanLeaf (unsigned int* input, size_t bytes_input, float* StdDev, size_t bytes_StdDev) { - __visc__hint(visc::GPU_TARGET); + __visc__hint(visc::DEVICE); __visc__attributes(2, input, minmax, 1, minmax); void* thisNode = __visc__getNode(); @@ -145,7 +145,7 @@ void PrescanBlock (unsigned int* input, size_t bytes_input, unsigned int* minmax, size_t bytes_minmax, int block) { - __visc__hint(visc::GPU_TARGET); + __visc__hint(visc::CPU_TARGET); __visc__attributes(2, input, minmax, 1, minmax); void* AllocationNode = __visc__createNode(PrescanAllocation); @@ -257,7 +257,7 @@ void IntermediatesLeaf( uint2 *input, size_t bytes_input, unsigned int input_pitch, uchar4 *sm_mappings, size_t bytes_sm_mappings) { - __visc__hint(visc::GPU_TARGET); + __visc__hint(visc::DEVICE); __visc__attributes(1, input, 1, sm_mappings); void* thisNode = __visc__getNode(); @@ -302,7 +302,7 @@ void IntermediatesBlock(uint2 *input, size_t bytes_input, uchar4 *sm_mappings, size_t bytes_sm_mappings, int block) { - __visc__hint(visc::GPU_TARGET); + __visc__hint(visc::CPU_TARGET); __visc__attributes(1, input, 1, sm_mappings); void* IntermediatesLeafNode = __visc__createNode1D(IntermediatesLeaf, block); @@ -513,7 +513,7 @@ void MainLeaf ( uchar4* sm_mappings, size_t bytes_sm_mappings, * and also scans every N-th line for interesting data. N = gridDim.x */ - __visc__hint(visc::GPU_TARGET); + __visc__hint(visc::DEVICE); __visc__attributes(1, sm_mappings, 3, global_subhisto, global_histo, global_overflow); void* thisNode = __visc__getNode(); @@ -596,7 +596,7 @@ void MainBlock (uchar4* sm_mappings, size_t bytes_sm_mappings, int blockx, int blocky, int gridx, int gridy) { - __visc__hint(visc::GPU_TARGET); + __visc__hint(visc::CPU_TARGET); __visc__attributes(1, sm_mappings, 3, global_subhisto, global_histo, global_overflow); void* AllocationNode = __visc__createNode(MainAllocation); @@ -637,7 +637,7 @@ void MainRoot (uchar4* sm_mappings, size_t bytes_sm_mappings, int blockx, int blocky, int gridx, int gridy) { - __visc__hint(visc::GPU_TARGET); + __visc__hint(visc::CPU_TARGET); __visc__attributes(1, sm_mappings, 3, global_subhisto, global_histo, global_overflow); void* MainBlockNode = __visc__createNode2D(MainBlock, gridx, gridy); @@ -741,7 +741,7 @@ void FinalLeaf ( unsigned int *global_overflow, size_t bytes_global_overflow, unsigned int *final_histo, size_t bytes_final_histo) //final output { - __visc__hint(visc::GPU_TARGET); + __visc__hint(visc::DEVICE); __visc__attributes(3, global_subhisto, global_histo, global_overflow, 1, final_histo); void* thisNode = __visc__getNode(); @@ -859,7 +859,7 @@ void FinalBlock( unsigned int *final_histo, size_t bytes_final_histo, //final output int block) { - __visc__hint(visc::GPU_TARGET); + __visc__hint(visc::CPU_TARGET); __visc__attributes(3, global_subhisto, global_histo, global_overflow, 1, final_histo); void* FinalLeafNode = __visc__createNode1D(FinalLeaf, block); @@ -891,7 +891,7 @@ void FinalRoot( int block, int grid) { - __visc__hint(visc::GPU_TARGET); + __visc__hint(visc::CPU_TARGET); __visc__attributes(3, global_subhisto, global_histo, global_overflow, 1, final_histo); void* FinalBlockNode = __visc__createNode1D(FinalBlock, grid); @@ -1055,7 +1055,6 @@ int main(int argc, char* argv[]) { llvm_visc_track_mem(input, bytes_input); llvm_visc_track_mem(ranges, bytes_ranges); llvm_visc_track_mem(sm_mappings, bytes_sm_mappings); - llvm_visc_track_mem(global_subhisto, bytes_global_subhisto); llvm_visc_track_mem(global_histo, bytes_global_histo); llvm_visc_track_mem(global_overflow, bytes_global_overflow); llvm_visc_track_mem(final_histo, bytes_final_histo); @@ -1151,7 +1150,9 @@ int main(int argc, char* argv[]) { numIterations = 1; #endif + //memset(global_subhisto, 0, img_width*histo_height*sizeof(unsigned int)); for (int iter = 0; iter < numIterations; iter++) { + llvm_visc_track_mem(global_subhisto, bytes_global_subhisto); pb_SwitchToSubTimer(&timers, postpremems , visc_TimerID_COMPUTATION); //llvm_visc_request_mem(ranges, bytes_ranges); //llvm_visc_request_mem(input, bytes_input); @@ -1186,10 +1187,10 @@ int main(int argc, char* argv[]) { #endif // Requesting only so that we can write it to zero. IT should not be // copied from device to host - //llvm_visc_request_mem(global_subhisto, bytes_global_subhisto); + llvm_visc_request_mem(global_subhisto, bytes_global_subhisto); // Set global_subhisto to zero + memset(global_subhisto, 0, bytes_global_subhisto); //pb_SwitchToSubTimer(&timers, memsets , visc_TimerID_COMPUTATION); - //memset(global_subhisto, 0, img_width*histo_height*sizeof(unsigned int)); pb_SwitchToSubTimer(&timers, intermediates, visc_TimerID_COMPUTATION); @@ -1228,6 +1229,7 @@ int main(int argc, char* argv[]) { __visc__wait(FinalDFG); pb_SwitchToSubTimer(&timers, viscOverhead, visc_TimerID_COMPUTATION); + llvm_visc_untrack_mem(global_subhisto); } pb_SwitchToTimer(&timers, pb_TimerID_COPY); diff --git a/llvm/test/VISC/parboil/benchmarks/sgemm/src/visc_sh/main.cc b/llvm/test/VISC/parboil/benchmarks/sgemm/src/visc_sh/main.cc index 6e60ddd11ae4fe6a842b563ea9dfeaff2dd4fbb6..2a1bb56e6d9f869bec78109f0aead055305e23fb 100644 --- a/llvm/test/VISC/parboil/benchmarks/sgemm/src/visc_sh/main.cc +++ b/llvm/test/VISC/parboil/benchmarks/sgemm/src/visc_sh/main.cc @@ -107,7 +107,7 @@ void Allocation(int block_x, int block_y) { void SgemmLeaf( float* A, size_t bytesA, int lda, float* B, size_t bytesB, int ldb, float* C, size_t bytesC, int ldc, int k, float alpha, float beta, float* shB, size_t bytesshB ) { - __visc__hint(visc::GPU_TARGET); + __visc__hint(visc::DEVICE); //__visc__hint(visc::SPIR_TARGET); // TODO: shB is not an in or out attribute __visc__attributes(3, A, B, C, 1, C); diff --git a/llvm/test/VISC/parboil/benchmarks/tpacf/src/visc/main.cc b/llvm/test/VISC/parboil/benchmarks/tpacf/src/visc/main.cc index 1158ffc04a03411ee5f34552f5ef8e74a27c50e0..4fe3cbf526d716d63319cb4d75a2e2c779cc3a79 100644 --- a/llvm/test/VISC/parboil/benchmarks/tpacf/src/visc/main.cc +++ b/llvm/test/VISC/parboil/benchmarks/tpacf/src/visc/main.cc @@ -90,7 +90,7 @@ void TPACFLeaf(hist_t* histograms, size_t bytes_histograms, //struct cartesian* data_s, size_t bytes_data_s, unsigned int* warp_hists, size_t bytes_warp_hists) { - __visc__hint(visc::GPU_TARGET); + __visc__hint(visc::DEVICE); __visc__attributes(2, all_x_data, binb, 1, histograms); void* thisNode = __visc__getNode(); @@ -237,7 +237,7 @@ void BlockingTPACF(hist_t* histograms, size_t bytes_histograms, struct cartesian* data_s, size_t bytes_data_s, unsigned int* warp_hists, size_t bytes_warp_hists) { - __visc__hint(visc::GPU_TARGET); + __visc__hint(visc::CPU_TARGET); __visc__attributes(2, all_x_data, binb, 1, histograms); void* AllocationNode = __visc__createNode(Allocation); diff --git a/llvm/test/VISC/parboil/common/include/visc.h b/llvm/test/VISC/parboil/common/include/visc.h index 29b5be348cca9d81ffc409aa1f342bfc519d7779..00ef97a7ff1362f252e04a70318e9d5957e781cf 100644 --- a/llvm/test/VISC/parboil/common/include/visc.h +++ b/llvm/test/VISC/parboil/common/include/visc.h @@ -6,7 +6,12 @@ *cr ***************************************************************************/ +#ifndef DEVICE +#define DEVICE GPU_TARGET +#endif + #include "llvm/SupportVISC/VISCHint.h" + #ifdef __cplusplus extern "C" { void __visc__hint(visc::Target);