36 #include "specializations/warp_scan_shfl.cuh"
37 #include "specializations/warp_scan_smem.cuh"
38 #include "../thread/thread_operators.cuh"
39 #include "../util_arch.cuh"
40 #include "../util_type.cuh"
41 #include "../util_namespace.cuh"
144 int LOGICAL_WARP_THREADS = CUB_PTX_WARP_THREADS,
145 int PTX_ARCH = CUB_PTX_ARCH>
157 IS_ARCH_WARP = (LOGICAL_WARP_THREADS == CUB_WARP_THREADS(PTX_ARCH)),
160 IS_POW_OF_TWO = ((LOGICAL_WARP_THREADS & (LOGICAL_WARP_THREADS - 1)) == 0),
163 IS_INTEGER = ((Traits<T>::CATEGORY == SIGNED_INTEGER) || (Traits<T>::CATEGORY == UNSIGNED_INTEGER))
168 WarpScanShfl<T, LOGICAL_WARP_THREADS, PTX_ARCH>,
169 WarpScanSmem<T, LOGICAL_WARP_THREADS, PTX_ARCH> >::Type InternalWarpScan;
172 typedef typename InternalWarpScan::TempStorage _TempStorage;
180 _TempStorage &temp_storage;
206 temp_storage(temp_storage.Alias()),
207 lane_id(IS_ARCH_WARP ?
209 LaneId() % LOGICAL_WARP_THREADS)
258 InternalWarpScan(temp_storage).InclusiveScan(input, output,
cub::Sum());
302 InternalWarpScan(temp_storage).InclusiveScan(input, output,
cub::Sum(), warp_aggregate);
353 InternalWarpScan(temp_storage).ExclusiveScan(input, output, ZeroInitialize<T>(),
cub::Sum());
398 InternalWarpScan(temp_storage).ExclusiveScan(input, output, ZeroInitialize<T>(),
cub::Sum(), warp_aggregate);
444 template <
typename ScanOp>
450 InternalWarpScan(temp_storage).InclusiveScan(input, output, scan_op);
494 template <
typename ScanOp>
501 InternalWarpScan(temp_storage).InclusiveScan(input, output, scan_op, warp_aggregate);
547 template <
typename ScanOp>
554 InternalWarpScan(temp_storage).ExclusiveScan(input, output, identity, scan_op);
597 template <
typename ScanOp>
605 InternalWarpScan(temp_storage).ExclusiveScan(input, output, identity, scan_op, warp_aggregate);
653 template <
typename ScanOp>
659 InternalWarpScan(temp_storage).ExclusiveScan(input, output, scan_op);
702 template <
typename ScanOp>
709 InternalWarpScan(temp_storage).ExclusiveScan(input, output, scan_op, warp_aggregate);
759 __device__ __forceinline__
void Sum(
764 InternalWarpScan(temp_storage).Scan(input, inclusive_output, exclusive_output, ZeroInitialize<T>(),
cub::Sum());
807 template <
typename ScanOp>
808 __device__ __forceinline__
void Scan(
815 InternalWarpScan(temp_storage).Scan(input, inclusive_output, exclusive_output, identity, scan_op);
858 template <
typename ScanOp>
859 __device__ __forceinline__
void Scan(
865 InternalWarpScan(temp_storage).Scan(input, inclusive_output, exclusive_output, scan_op);
912 unsigned int src_lane)
914 return InternalWarpScan(temp_storage).Broadcast(input, src_lane);