41 #include "dispatch/dispatch_reduce.cuh"
42 #include "dispatch/dispatch_reduce_by_key.cuh"
43 #include "../util_namespace.cuh"
137 typename InputIteratorT,
138 typename OutputIteratorT,
139 typename ReductionOpT,
143 void *d_temp_storage,
144 size_t &temp_storage_bytes,
146 OutputIteratorT d_out,
148 ReductionOpT reduction_op,
150 cudaStream_t stream = 0,
151 bool debug_synchronous =
false)
155 return DispatchReduce<InputIteratorT, OutputIteratorT, OffsetT, ReductionOpT>::Dispatch(
214 typename InputIteratorT,
215 typename OutputIteratorT>
218 void *d_temp_storage,
219 size_t &temp_storage_bytes,
221 OutputIteratorT d_out,
223 cudaStream_t stream = 0,
224 bool debug_synchronous =
false)
227 typedef typename std::iterator_traits<InputIteratorT>::value_type T;
229 return DispatchReduce<InputIteratorT, OutputIteratorT, OffsetT, cub::Sum>::Dispatch(
281 typename InputIteratorT,
282 typename OutputIteratorT>
285 void *d_temp_storage,
286 size_t &temp_storage_bytes,
288 OutputIteratorT d_out,
290 cudaStream_t stream = 0,
291 bool debug_synchronous =
false)
294 typedef typename std::iterator_traits<InputIteratorT>::value_type T;
296 return DispatchReduce<InputIteratorT, OutputIteratorT, OffsetT, cub::Min>::Dispatch(
350 typename InputIteratorT,
351 typename OutputIteratorT>
354 void *d_temp_storage,
355 size_t &temp_storage_bytes,
357 OutputIteratorT d_out,
359 cudaStream_t stream = 0,
360 bool debug_synchronous =
false)
363 typedef typename std::iterator_traits<InputIteratorT>::value_type T;
366 ArgIndexInputIteratorT d_argmin_in(d_in);
367 KeyValuePair<OffsetT, T> init = {1, Traits<T>::Max()};
369 return DispatchReduce<ArgIndexInputIteratorT, OutputIteratorT, OffsetT, cub::ArgMin>::Dispatch(
421 typename InputIteratorT,
422 typename OutputIteratorT>
425 void *d_temp_storage,
426 size_t &temp_storage_bytes,
428 OutputIteratorT d_out,
430 cudaStream_t stream = 0,
431 bool debug_synchronous =
false)
434 typedef typename std::iterator_traits<InputIteratorT>::value_type T;
436 return DispatchReduce<InputIteratorT, OutputIteratorT, OffsetT, cub::Max>::Dispatch(
490 typename InputIteratorT,
491 typename OutputIteratorT>
494 void *d_temp_storage,
495 size_t &temp_storage_bytes,
497 OutputIteratorT d_out,
499 cudaStream_t stream = 0,
500 bool debug_synchronous =
false)
503 typedef typename std::iterator_traits<InputIteratorT>::value_type T;
506 ArgIndexInputIteratorT d_argmax_in(d_in);
507 KeyValuePair<OffsetT, T> init = {1, Traits<T>::Lowest()};
509 return DispatchReduce<ArgIndexInputIteratorT, OutputIteratorT, OffsetT, cub::ArgMax>::Dispatch(
604 typename KeysInputIteratorT,
605 typename UniqueOutputIteratorT,
606 typename ValuesInputIteratorT,
607 typename AggregatesOutputIteratorT,
608 typename NumRunsOutputIteratorT,
609 typename ReductionOpT>
610 CUB_RUNTIME_FUNCTION __forceinline__
612 void *d_temp_storage,
613 size_t &temp_storage_bytes,
614 KeysInputIteratorT d_keys_in,
615 UniqueOutputIteratorT d_unique_out,
616 ValuesInputIteratorT d_values_in,
617 AggregatesOutputIteratorT d_aggregates_out,
618 NumRunsOutputIteratorT d_num_runs_out,
619 ReductionOpT reduction_op,
621 cudaStream_t stream = 0,
622 bool debug_synchronous =
false)
625 typedef NullType* FlagIterator;
626 typedef NullType SelectOp;
629 return DispatchReduceByKey<KeysInputIteratorT, UniqueOutputIteratorT, ValuesInputIteratorT, AggregatesOutputIteratorT, NumRunsOutputIteratorT, EqualityOp, ReductionOpT, OffsetT>::Dispatch(