CUB
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups
device_segmented_reduce.cuh
Go to the documentation of this file.
1 
2 /******************************************************************************
3  * Copyright (c) 2011, Duane Merrill. All rights reserved.
4  * Copyright (c) 2011-2016, NVIDIA CORPORATION. All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  * * Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  * * Redistributions in binary form must reproduce the above copyright
11  * notice, this list of conditions and the following disclaimer in the
12  * documentation and/or other materials provided with the distribution.
13  * * Neither the name of the NVIDIA CORPORATION nor the
14  * names of its contributors may be used to endorse or promote products
15  * derived from this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
21  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  ******************************************************************************/
29 
35 #pragma once
36 
37 #include <stdio.h>
38 #include <iterator>
39 
40 #include "dispatch/dispatch_reduce.cuh"
41 #include "dispatch/dispatch_reduce_by_key.cuh"
42 #include "../util_type.cuh"
43 #include "../util_namespace.cuh"
44 
46 CUB_NS_PREFIX
47 
49 namespace cub {
50 
51 
65 {
124  template <
125  typename InputIteratorT,
126  typename OutputIteratorT,
127  typename ReductionOp,
128  typename T>
129  CUB_RUNTIME_FUNCTION
130  static cudaError_t Reduce(
131  void *d_temp_storage,
132  size_t &temp_storage_bytes,
133  InputIteratorT d_in,
134  OutputIteratorT d_out,
135  int num_segments,
136  int *d_begin_offsets,
137  int *d_end_offsets,
138  ReductionOp reduction_op,
139  T init,
140  cudaStream_t stream = 0,
141  bool debug_synchronous = false)
142  {
143  // Signed integer type for global offsets
144  typedef int OffsetT;
145 
146  return DispatchSegmentedReduce<InputIteratorT, OutputIteratorT, OffsetT, ReductionOp>::Dispatch(
147  d_temp_storage,
148  temp_storage_bytes,
149  d_in,
150  d_out,
151  num_segments,
152  d_begin_offsets,
153  d_end_offsets,
154  reduction_op,
155  init,
156  stream,
157  debug_synchronous);
158  }
159 
160 
206  template <
207  typename InputIteratorT,
208  typename OutputIteratorT>
209  CUB_RUNTIME_FUNCTION
210  static cudaError_t Sum(
211  void *d_temp_storage,
212  size_t &temp_storage_bytes,
213  InputIteratorT d_in,
214  OutputIteratorT d_out,
215  int num_segments,
216  int *d_begin_offsets,
217  int *d_end_offsets,
218  cudaStream_t stream = 0,
219  bool debug_synchronous = false)
220  {
221  typedef int OffsetT; // Signed integer type for global offsets
222  typedef typename std::iterator_traits<InputIteratorT>::value_type T; // Data element type
223 
224  return DispatchSegmentedReduce<InputIteratorT, OutputIteratorT, OffsetT, cub::Sum>::Dispatch(
225  d_temp_storage,
226  temp_storage_bytes,
227  d_in,
228  d_out,
229  num_segments,
230  d_begin_offsets,
231  d_end_offsets,
232  cub::Sum(),
233  T(), // zero-initialize
234  stream,
235  debug_synchronous);
236  }
237 
238 
284  template <
285  typename InputIteratorT,
286  typename OutputIteratorT>
287  CUB_RUNTIME_FUNCTION
288  static cudaError_t Min(
289  void *d_temp_storage,
290  size_t &temp_storage_bytes,
291  InputIteratorT d_in,
292  OutputIteratorT d_out,
293  int num_segments,
294  int *d_begin_offsets,
295  int *d_end_offsets,
296  cudaStream_t stream = 0,
297  bool debug_synchronous = false)
298  {
299  typedef int OffsetT; // Signed integer type for global offsets
300  typedef typename std::iterator_traits<InputIteratorT>::value_type T; // Data element type
301 
302  return DispatchSegmentedReduce<InputIteratorT, OutputIteratorT, OffsetT, cub::Min>::Dispatch(
303  d_temp_storage,
304  temp_storage_bytes,
305  d_in,
306  d_out,
307  num_segments,
308  d_begin_offsets,
309  d_end_offsets,
310  cub::Min(),
311  Traits<T>::Max(), // replace with std::numeric_limits<T>::max() when C++11 support is more prevalent
312  stream,
313  debug_synchronous);
314  }
315 
316 
364  template <
365  typename InputIteratorT,
366  typename OutputIteratorT>
367  CUB_RUNTIME_FUNCTION
368  static cudaError_t ArgMin(
369  void *d_temp_storage,
370  size_t &temp_storage_bytes,
371  InputIteratorT d_in,
372  OutputIteratorT d_out,
373  int num_segments,
374  int *d_begin_offsets,
375  int *d_end_offsets,
376  cudaStream_t stream = 0,
377  bool debug_synchronous = false)
378  {
379  typedef int OffsetT; // Signed integer type for global offsets
380  typedef typename std::iterator_traits<InputIteratorT>::value_type T; // Data element type
381  typedef ArgIndexInputIterator<InputIteratorT, int> ArgIndexInputIteratorT; // Wrapped input iterator type
382 
383  ArgIndexInputIteratorT d_argmin_in(d_in);
384  KeyValuePair<OffsetT, T> init = {1, Traits<T>::Max()}; // replace with std::numeric_limits<T>::max() when C++11 support is more prevalent
385 
386  return DispatchSegmentedReduce<ArgIndexInputIteratorT, OutputIteratorT, OffsetT, cub::ArgMin>::Dispatch(
387  d_temp_storage,
388  temp_storage_bytes,
389  d_argmin_in,
390  d_out,
391  num_segments,
392  d_begin_offsets,
393  d_end_offsets,
394  cub::ArgMin(),
395  init,
396  stream,
397  debug_synchronous);
398  }
399 
400 
446  template <
447  typename InputIteratorT,
448  typename OutputIteratorT>
449  CUB_RUNTIME_FUNCTION
450  static cudaError_t Max(
451  void *d_temp_storage,
452  size_t &temp_storage_bytes,
453  InputIteratorT d_in,
454  OutputIteratorT d_out,
455  int num_segments,
456  int *d_begin_offsets,
457  int *d_end_offsets,
458  cudaStream_t stream = 0,
459  bool debug_synchronous = false)
460  {
461  typedef int OffsetT; // Signed integer type for global offsets
462  typedef typename std::iterator_traits<InputIteratorT>::value_type T; // Data element type
463 
464  return DispatchSegmentedReduce<InputIteratorT, OutputIteratorT, OffsetT, cub::Max>::Dispatch(
465  d_temp_storage,
466  temp_storage_bytes,
467  d_in,
468  d_out,
469  num_segments,
470  d_begin_offsets,
471  d_end_offsets,
472  cub::Max(),
473  Traits<T>::Lowest(), // replace with std::numeric_limits<T>::lowest() when C++11 support is more prevalent
474  stream,
475  debug_synchronous);
476  }
477 
478 
526  template <
527  typename InputIteratorT,
528  typename OutputIteratorT>
529  CUB_RUNTIME_FUNCTION
530  static cudaError_t ArgMax(
531  void *d_temp_storage,
532  size_t &temp_storage_bytes,
533  InputIteratorT d_in,
534  OutputIteratorT d_out,
535  int num_segments,
536  int *d_begin_offsets,
537  int *d_end_offsets,
538  cudaStream_t stream = 0,
539  bool debug_synchronous = false)
540  {
541  typedef int OffsetT; // Signed integer type for global offsets
542  typedef typename std::iterator_traits<InputIteratorT>::value_type T; // Data element type
543  typedef ArgIndexInputIterator<InputIteratorT, int> ArgIndexInputIteratorT; // Wrapped input iterator
544 
545  ArgIndexInputIteratorT d_argmax_in(d_in);
546  KeyValuePair<OffsetT, T> init = {1, Traits<T>::Lowest()}; // replace with std::numeric_limits<T>::lowest() when C++11 support is more prevalent
547 
548  return DispatchSegmentedReduce<ArgIndexInputIteratorT, OutputIteratorT, OffsetT, cub::ArgMax>::Dispatch(
549  d_temp_storage,
550  temp_storage_bytes,
551  d_argmax_in,
552  d_out,
553  num_segments,
554  d_begin_offsets,
555  d_end_offsets,
556  cub::ArgMax(),
557  init,
558  stream,
559  debug_synchronous);
560  }
561 
562 };
563 
564 } // CUB namespace
565 CUB_NS_POSTFIX // Optional outer namespace(s)
566 
567