CUB
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups
device_reduce.cuh
Go to the documentation of this file.
1 
2 /******************************************************************************
3  * Copyright (c) 2011, Duane Merrill. All rights reserved.
4  * Copyright (c) 2011-2016, NVIDIA CORPORATION. All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  * * Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  * * Redistributions in binary form must reproduce the above copyright
11  * notice, this list of conditions and the following disclaimer in the
12  * documentation and/or other materials provided with the distribution.
13  * * Neither the name of the NVIDIA CORPORATION nor the
14  * names of its contributors may be used to endorse or promote products
15  * derived from this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
21  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  ******************************************************************************/
29 
35 #pragma once
36 
37 #include <stdio.h>
38 #include <iterator>
39 #include <limits>
40 
41 #include "dispatch/dispatch_reduce.cuh"
42 #include "dispatch/dispatch_reduce_by_key.cuh"
43 #include "../util_namespace.cuh"
44 
46 CUB_NS_PREFIX
47 
49 namespace cub {
50 
51 
84 {
136  template <
137  typename InputIteratorT,
138  typename OutputIteratorT,
139  typename ReductionOpT,
140  typename T>
141  CUB_RUNTIME_FUNCTION
142  static cudaError_t Reduce(
143  void *d_temp_storage,
144  size_t &temp_storage_bytes,
145  InputIteratorT d_in,
146  OutputIteratorT d_out,
147  int num_items,
148  ReductionOpT reduction_op,
149  T init,
150  cudaStream_t stream = 0,
151  bool debug_synchronous = false)
152  {
153  typedef int OffsetT; // Signed integer type for global offsets
154 
155  return DispatchReduce<InputIteratorT, OutputIteratorT, OffsetT, ReductionOpT>::Dispatch(
156  d_temp_storage,
157  temp_storage_bytes,
158  d_in,
159  d_out,
160  num_items,
161  reduction_op,
162  init,
163  stream,
164  debug_synchronous);
165  }
166 
167 
213  template <
214  typename InputIteratorT,
215  typename OutputIteratorT>
216  CUB_RUNTIME_FUNCTION
217  static cudaError_t Sum(
218  void *d_temp_storage,
219  size_t &temp_storage_bytes,
220  InputIteratorT d_in,
221  OutputIteratorT d_out,
222  int num_items,
223  cudaStream_t stream = 0,
224  bool debug_synchronous = false)
225  {
226  typedef int OffsetT; // Signed integer type for global offsets
227  typedef typename std::iterator_traits<InputIteratorT>::value_type T; // Data element type
228 
229  return DispatchReduce<InputIteratorT, OutputIteratorT, OffsetT, cub::Sum>::Dispatch(
230  d_temp_storage,
231  temp_storage_bytes,
232  d_in,
233  d_out,
234  num_items,
235  cub::Sum(),
236  T(), // zero-initialize
237  stream,
238  debug_synchronous);
239  }
240 
241 
280  template <
281  typename InputIteratorT,
282  typename OutputIteratorT>
283  CUB_RUNTIME_FUNCTION
284  static cudaError_t Min(
285  void *d_temp_storage,
286  size_t &temp_storage_bytes,
287  InputIteratorT d_in,
288  OutputIteratorT d_out,
289  int num_items,
290  cudaStream_t stream = 0,
291  bool debug_synchronous = false)
292  {
293  typedef int OffsetT; // Signed integer type for global offsets
294  typedef typename std::iterator_traits<InputIteratorT>::value_type T; // Data element type
295 
296  return DispatchReduce<InputIteratorT, OutputIteratorT, OffsetT, cub::Min>::Dispatch(
297  d_temp_storage,
298  temp_storage_bytes,
299  d_in,
300  d_out,
301  num_items,
302  cub::Min(),
303  Traits<T>::Max(), // replace with std::numeric_limits<T>::max() when C++11 support is more prevalent
304  stream,
305  debug_synchronous);
306  }
307 
308 
349  template <
350  typename InputIteratorT,
351  typename OutputIteratorT>
352  CUB_RUNTIME_FUNCTION
353  static cudaError_t ArgMin(
354  void *d_temp_storage,
355  size_t &temp_storage_bytes,
356  InputIteratorT d_in,
357  OutputIteratorT d_out,
358  int num_items,
359  cudaStream_t stream = 0,
360  bool debug_synchronous = false)
361  {
362  typedef int OffsetT; // Signed integer type for global offsets
363  typedef typename std::iterator_traits<InputIteratorT>::value_type T; // Data element type
364  typedef ArgIndexInputIterator<InputIteratorT, int> ArgIndexInputIteratorT; // Wrapped input iterator type
365 
366  ArgIndexInputIteratorT d_argmin_in(d_in);
367  KeyValuePair<OffsetT, T> init = {1, Traits<T>::Max()}; // replace with std::numeric_limits<T>::max() when C++11 support is more prevalent
368 
369  return DispatchReduce<ArgIndexInputIteratorT, OutputIteratorT, OffsetT, cub::ArgMin>::Dispatch(
370  d_temp_storage,
371  temp_storage_bytes,
372  d_argmin_in,
373  d_out,
374  num_items,
375  cub::ArgMin(),
376  init,
377  stream,
378  debug_synchronous);
379  }
380 
381 
420  template <
421  typename InputIteratorT,
422  typename OutputIteratorT>
423  CUB_RUNTIME_FUNCTION
424  static cudaError_t Max(
425  void *d_temp_storage,
426  size_t &temp_storage_bytes,
427  InputIteratorT d_in,
428  OutputIteratorT d_out,
429  int num_items,
430  cudaStream_t stream = 0,
431  bool debug_synchronous = false)
432  {
433  typedef int OffsetT; // Signed integer type for global offsets
434  typedef typename std::iterator_traits<InputIteratorT>::value_type T; // Data element type
435 
436  return DispatchReduce<InputIteratorT, OutputIteratorT, OffsetT, cub::Max>::Dispatch(
437  d_temp_storage,
438  temp_storage_bytes,
439  d_in,
440  d_out,
441  num_items,
442  cub::Max(),
443  Traits<T>::Lowest(), // replace with std::numeric_limits<T>::lowest() when C++11 support is more prevalent
444  stream,
445  debug_synchronous);
446  }
447 
448 
489  template <
490  typename InputIteratorT,
491  typename OutputIteratorT>
492  CUB_RUNTIME_FUNCTION
493  static cudaError_t ArgMax(
494  void *d_temp_storage,
495  size_t &temp_storage_bytes,
496  InputIteratorT d_in,
497  OutputIteratorT d_out,
498  int num_items,
499  cudaStream_t stream = 0,
500  bool debug_synchronous = false)
501  {
502  typedef int OffsetT; // Signed integer type for global offsets
503  typedef typename std::iterator_traits<InputIteratorT>::value_type T; // Data element type
504  typedef ArgIndexInputIterator<InputIteratorT, int> ArgIndexInputIteratorT; // Wrapped input iterator
505 
506  ArgIndexInputIteratorT d_argmax_in(d_in);
507  KeyValuePair<OffsetT, T> init = {1, Traits<T>::Lowest()}; // replace with std::numeric_limits<T>::lowest() when C++11 support is more prevalent
508 
509  return DispatchReduce<ArgIndexInputIteratorT, OutputIteratorT, OffsetT, cub::ArgMax>::Dispatch(
510  d_temp_storage,
511  temp_storage_bytes,
512  d_argmax_in,
513  d_out,
514  num_items,
515  cub::ArgMax(),
516  init,
517  stream,
518  debug_synchronous);
519  }
520 
521 
603  template <
604  typename KeysInputIteratorT,
605  typename UniqueOutputIteratorT,
606  typename ValuesInputIteratorT,
607  typename AggregatesOutputIteratorT,
608  typename NumRunsOutputIteratorT,
609  typename ReductionOpT>
610  CUB_RUNTIME_FUNCTION __forceinline__
611  static cudaError_t ReduceByKey(
612  void *d_temp_storage,
613  size_t &temp_storage_bytes,
614  KeysInputIteratorT d_keys_in,
615  UniqueOutputIteratorT d_unique_out,
616  ValuesInputIteratorT d_values_in,
617  AggregatesOutputIteratorT d_aggregates_out,
618  NumRunsOutputIteratorT d_num_runs_out,
619  ReductionOpT reduction_op,
620  int num_items,
621  cudaStream_t stream = 0,
622  bool debug_synchronous = false)
623  {
624  typedef int OffsetT; // Signed integer type for global offsets
625  typedef NullType* FlagIterator; // FlagT iterator type (not used)
626  typedef NullType SelectOp; // Selection op (not used)
627  typedef Equality EqualityOp; // Default == operator
628 
629  return DispatchReduceByKey<KeysInputIteratorT, UniqueOutputIteratorT, ValuesInputIteratorT, AggregatesOutputIteratorT, NumRunsOutputIteratorT, EqualityOp, ReductionOpT, OffsetT>::Dispatch(
630  d_temp_storage,
631  temp_storage_bytes,
632  d_keys_in,
633  d_unique_out,
634  d_values_in,
635  d_aggregates_out,
636  d_num_runs_out,
637  EqualityOp(),
638  reduction_op,
639  num_items,
640  stream,
641  debug_synchronous);
642  }
643 
644 };
645 
650 } // CUB namespace
651 CUB_NS_POSTFIX // Optional outer namespace(s)
652 
653