36 #include "specializations/warp_reduce_shfl.cuh"
37 #include "specializations/warp_reduce_smem.cuh"
38 #include "../thread/thread_operators.cuh"
39 #include "../util_arch.cuh"
40 #include "../util_type.cuh"
41 #include "../util_namespace.cuh"
139 int LOGICAL_WARP_THREADS = CUB_PTX_WARP_THREADS,
140 int PTX_ARCH = CUB_PTX_ARCH>
152 IS_ARCH_WARP = (LOGICAL_WARP_THREADS == CUB_WARP_THREADS(PTX_ARCH)),
160 #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document
164 WarpReduceShfl<T, LOGICAL_WARP_THREADS, PTX_ARCH>,
165 WarpReduceSmem<T, LOGICAL_WARP_THREADS, PTX_ARCH> >::Type InternalWarpReduce;
167 #endif // DOXYGEN_SHOULD_SKIP_THIS
173 typedef typename InternalWarpReduce::TempStorage _TempStorage;
181 _TempStorage &temp_storage;
206 temp_storage(temp_storage.Alias())
251 __device__ __forceinline__ T
Sum(
254 return InternalWarpReduce(temp_storage).Reduce<
true, 1>(input, LOGICAL_WARP_THREADS,
cub::Sum());
295 __device__ __forceinline__ T
Sum(
300 return InternalWarpReduce(temp_storage).Reduce<
false, 1>(input, valid_items,
cub::Sum());
444 template <
typename ReductionOp>
447 ReductionOp reduction_op)
449 return InternalWarpReduce(temp_storage).Reduce<
true, 1>(input, LOGICAL_WARP_THREADS, reduction_op);
493 template <
typename ReductionOp>
496 ReductionOp reduction_op,
499 return InternalWarpReduce(temp_storage).Reduce<
false, 1>(input, valid_items, reduction_op);
543 typename ReductionOp,
548 ReductionOp reduction_op)
550 return InternalWarpReduce(temp_storage).template SegmentedReduce<true>(input, head_flag, reduction_op);
594 typename ReductionOp,
599 ReductionOp reduction_op)
601 return InternalWarpReduce(temp_storage).template SegmentedReduce<false>(input, tail_flag, reduction_op);