diff --git a/src/CudaUtil.cu b/src/CudaUtil.cu index eb23592fa838570848c1fd57443dd01a0c0ca329..5068fe80f16696579318b445d9b44acfac076dc8 100644 --- a/src/CudaUtil.cu +++ b/src/CudaUtil.cu @@ -41,7 +41,8 @@ __inline__ __device__ uint __lanemask_lt() } __device__ int atomicAggInc(int *ctr, int warpLane) { - unsigned int active = __ballot_sync(0xFFFFFFFF, 1); + // unsigned int active = __ballot_sync(0xFFFFFFFF, 1); + unsigned int active = __activemask(); int leader = __ffs(active) - 1; int change = __popc(active); unsigned int rank = __popc(active & __lanemask_lt());