diff --git a/src/CudaUtil.cu b/src/CudaUtil.cu
index eb23592fa838570848c1fd57443dd01a0c0ca329..5068fe80f16696579318b445d9b44acfac076dc8 100644
--- a/src/CudaUtil.cu
+++ b/src/CudaUtil.cu
@@ -41,7 +41,8 @@ __inline__ __device__ uint __lanemask_lt()
 }
 __device__ int atomicAggInc(int *ctr, int warpLane) 
 {
-    unsigned int active = __ballot_sync(0xFFFFFFFF, 1);
+    // unsigned int active = __ballot_sync(0xFFFFFFFF, 1);
+    unsigned int active = __activemask();
     int leader = __ffs(active) - 1;
     int change = __popc(active);
     unsigned int rank = __popc(active & __lanemask_lt());