CUB
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups
device_histogram.cuh
Go to the documentation of this file.
1 
2 /******************************************************************************
3  * Copyright (c) 2011, Duane Merrill. All rights reserved.
4  * Copyright (c) 2011-2016, NVIDIA CORPORATION. All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  * * Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  * * Redistributions in binary form must reproduce the above copyright
11  * notice, this list of conditions and the following disclaimer in the
12  * documentation and/or other materials provided with the distribution.
13  * * Neither the name of the NVIDIA CORPORATION nor the
14  * names of its contributors may be used to endorse or promote products
15  * derived from this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
21  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  ******************************************************************************/
29 
35 #pragma once
36 
37 #include <stdio.h>
38 #include <iterator>
39 #include <limits>
40 
41 #include "dispatch/dispatch_histogram.cuh"
42 #include "../util_namespace.cuh"
43 
45 CUB_NS_PREFIX
46 
48 namespace cub {
49 
50 
64 {
65  /******************************************************************/
69 
118  template <
119  typename SampleIteratorT,
120  typename CounterT,
121  typename LevelT,
122  typename OffsetT>
123  CUB_RUNTIME_FUNCTION
124  static cudaError_t HistogramEven(
125  void* d_temp_storage,
126  size_t& temp_storage_bytes,
127  SampleIteratorT d_samples,
128  CounterT* d_histogram,
129  int num_levels,
130  LevelT lower_level,
131  LevelT upper_level,
132  OffsetT num_samples,
133  cudaStream_t stream = 0,
134  bool debug_synchronous = false)
135  {
137  typedef typename std::iterator_traits<SampleIteratorT>::value_type SampleT;
138 
139  CounterT* d_histogram1[1] = {d_histogram};
140  int num_levels1[1] = {num_levels};
141  LevelT lower_level1[1] = {lower_level};
142  LevelT upper_level1[1] = {upper_level};
143 
144  return MultiHistogramEven<1, 1>(
145  d_temp_storage,
146  temp_storage_bytes,
147  d_samples,
148  d_histogram1,
149  num_levels1,
150  lower_level1,
151  upper_level1,
152  num_samples,
153  1,
154  sizeof(SampleT) * num_samples,
155  stream,
156  debug_synchronous);
157  }
158 
159 
217  template <
218  typename SampleIteratorT,
219  typename CounterT,
220  typename LevelT,
221  typename OffsetT>
222  CUB_RUNTIME_FUNCTION
223  static cudaError_t HistogramEven(
224  void* d_temp_storage,
225  size_t& temp_storage_bytes,
226  SampleIteratorT d_samples,
227  CounterT* d_histogram,
228  int num_levels,
229  LevelT lower_level,
230  LevelT upper_level,
231  OffsetT num_row_samples,
232  OffsetT num_rows,
233  size_t row_stride_bytes,
234  cudaStream_t stream = 0,
235  bool debug_synchronous = false)
236  {
237  CounterT* d_histogram1[1] = {d_histogram};
238  int num_levels1[1] = {num_levels};
239  LevelT lower_level1[1] = {lower_level};
240  LevelT upper_level1[1] = {upper_level};
241 
242  return MultiHistogramEven<1, 1>(
243  d_temp_storage,
244  temp_storage_bytes,
245  d_samples,
246  d_histogram1,
247  num_levels1,
248  lower_level1,
249  upper_level1,
250  num_row_samples,
251  num_rows,
252  row_stride_bytes,
253  stream,
254  debug_synchronous);
255  }
256 
317  template <
318  int NUM_CHANNELS,
319  int NUM_ACTIVE_CHANNELS,
320  typename SampleIteratorT,
321  typename CounterT,
322  typename LevelT,
323  typename OffsetT>
324  CUB_RUNTIME_FUNCTION
325  static cudaError_t MultiHistogramEven(
326  void* d_temp_storage,
327  size_t& temp_storage_bytes,
328  SampleIteratorT d_samples,
329  CounterT* d_histogram[NUM_ACTIVE_CHANNELS],
330  int num_levels[NUM_ACTIVE_CHANNELS],
331  LevelT lower_level[NUM_ACTIVE_CHANNELS],
332  LevelT upper_level[NUM_ACTIVE_CHANNELS],
333  OffsetT num_pixels,
334  cudaStream_t stream = 0,
335  bool debug_synchronous = false)
336  {
338  typedef typename std::iterator_traits<SampleIteratorT>::value_type SampleT;
339 
340  return MultiHistogramEven<NUM_CHANNELS, NUM_ACTIVE_CHANNELS>(
341  d_temp_storage,
342  temp_storage_bytes,
343  d_samples,
344  d_histogram,
345  num_levels,
346  lower_level,
347  upper_level,
348  num_pixels,
349  1,
350  sizeof(SampleT) * NUM_CHANNELS * num_pixels,
351  stream,
352  debug_synchronous);
353  }
354 
355 
424  template <
425  int NUM_CHANNELS,
426  int NUM_ACTIVE_CHANNELS,
427  typename SampleIteratorT,
428  typename CounterT,
429  typename LevelT,
430  typename OffsetT>
431  CUB_RUNTIME_FUNCTION
432  static cudaError_t MultiHistogramEven(
433  void* d_temp_storage,
434  size_t& temp_storage_bytes,
435  SampleIteratorT d_samples,
436  CounterT* d_histogram[NUM_ACTIVE_CHANNELS],
437  int num_levels[NUM_ACTIVE_CHANNELS],
438  LevelT lower_level[NUM_ACTIVE_CHANNELS],
439  LevelT upper_level[NUM_ACTIVE_CHANNELS],
440  OffsetT num_row_pixels,
441  OffsetT num_rows,
442  size_t row_stride_bytes,
443  cudaStream_t stream = 0,
444  bool debug_synchronous = false)
445  {
447  typedef typename std::iterator_traits<SampleIteratorT>::value_type SampleT;
448  Int2Type<sizeof(SampleT) == 1> is_byte_sample;
449 
450  if ((sizeof(OffsetT) > sizeof(int)) && (row_stride_bytes * num_rows < std::numeric_limits<int>::max()))
451  {
452  // Down-convert OffsetT data type
453 
454 
455  return DipatchHistogram<NUM_CHANNELS, NUM_ACTIVE_CHANNELS, SampleIteratorT, CounterT, LevelT, int>::DispatchEven(
456  d_temp_storage, temp_storage_bytes, d_samples, d_histogram, num_levels, lower_level, upper_level,
457  (int) num_row_pixels, (int) num_rows, (int) (row_stride_bytes / sizeof(SampleT)),
458  stream, debug_synchronous, is_byte_sample);
459  }
460 
461  return DipatchHistogram<NUM_CHANNELS, NUM_ACTIVE_CHANNELS, SampleIteratorT, CounterT, LevelT, OffsetT>::DispatchEven(
462  d_temp_storage, temp_storage_bytes, d_samples, d_histogram, num_levels, lower_level, upper_level,
463  num_row_pixels, num_rows, (OffsetT) (row_stride_bytes / sizeof(SampleT)),
464  stream, debug_synchronous, is_byte_sample);
465  }
466 
467 
469  /******************************************************************/
473 
521  template <
522  typename SampleIteratorT,
523  typename CounterT,
524  typename LevelT,
525  typename OffsetT>
526  CUB_RUNTIME_FUNCTION
527  static cudaError_t HistogramRange(
528  void* d_temp_storage,
529  size_t& temp_storage_bytes,
530  SampleIteratorT d_samples,
531  CounterT* d_histogram,
532  int num_levels,
533  LevelT* d_levels,
534  OffsetT num_samples,
535  cudaStream_t stream = 0,
536  bool debug_synchronous = false)
537  {
539  typedef typename std::iterator_traits<SampleIteratorT>::value_type SampleT;
540 
541  CounterT* d_histogram1[1] = {d_histogram};
542  int num_levels1[1] = {num_levels};
543  LevelT* d_levels1[1] = {d_levels};
544 
545  return MultiHistogramRange<1, 1>(
546  d_temp_storage,
547  temp_storage_bytes,
548  d_samples,
549  d_histogram1,
550  num_levels1,
551  d_levels1,
552  num_samples,
553  1,
554  sizeof(SampleT) * num_samples,
555  stream,
556  debug_synchronous);
557  }
558 
559 
616  template <
617  typename SampleIteratorT,
618  typename CounterT,
619  typename LevelT,
620  typename OffsetT>
621  CUB_RUNTIME_FUNCTION
622  static cudaError_t HistogramRange(
623  void* d_temp_storage,
624  size_t& temp_storage_bytes,
625  SampleIteratorT d_samples,
626  CounterT* d_histogram,
627  int num_levels,
628  LevelT* d_levels,
629  OffsetT num_row_samples,
630  OffsetT num_rows,
631  size_t row_stride_bytes,
632  cudaStream_t stream = 0,
633  bool debug_synchronous = false)
634  {
635  CounterT* d_histogram1[1] = {d_histogram};
636  int num_levels1[1] = {num_levels};
637  LevelT* d_levels1[1] = {d_levels};
638 
639  return MultiHistogramRange<1, 1>(
640  d_temp_storage,
641  temp_storage_bytes,
642  d_samples,
643  d_histogram1,
644  num_levels1,
645  d_levels1,
646  num_row_samples,
647  num_rows,
648  row_stride_bytes,
649  stream,
650  debug_synchronous);
651  }
652 
713  template <
714  int NUM_CHANNELS,
715  int NUM_ACTIVE_CHANNELS,
716  typename SampleIteratorT,
717  typename CounterT,
718  typename LevelT,
719  typename OffsetT>
720  CUB_RUNTIME_FUNCTION
721  static cudaError_t MultiHistogramRange(
722  void* d_temp_storage,
723  size_t& temp_storage_bytes,
724  SampleIteratorT d_samples,
725  CounterT* d_histogram[NUM_ACTIVE_CHANNELS],
726  int num_levels[NUM_ACTIVE_CHANNELS],
727  LevelT* d_levels[NUM_ACTIVE_CHANNELS],
728  OffsetT num_pixels,
729  cudaStream_t stream = 0,
730  bool debug_synchronous = false)
731  {
733  typedef typename std::iterator_traits<SampleIteratorT>::value_type SampleT;
734 
735  return MultiHistogramRange<NUM_CHANNELS, NUM_ACTIVE_CHANNELS>(
736  d_temp_storage,
737  temp_storage_bytes,
738  d_samples,
739  d_histogram,
740  num_levels,
741  d_levels,
742  num_pixels,
743  1,
744  sizeof(SampleT) * NUM_CHANNELS * num_pixels,
745  stream,
746  debug_synchronous);
747  }
748 
749 
816  template <
817  int NUM_CHANNELS,
818  int NUM_ACTIVE_CHANNELS,
819  typename SampleIteratorT,
820  typename CounterT,
821  typename LevelT,
822  typename OffsetT>
823  CUB_RUNTIME_FUNCTION
824  static cudaError_t MultiHistogramRange(
825  void* d_temp_storage,
826  size_t& temp_storage_bytes,
827  SampleIteratorT d_samples,
828  CounterT* d_histogram[NUM_ACTIVE_CHANNELS],
829  int num_levels[NUM_ACTIVE_CHANNELS],
830  LevelT* d_levels[NUM_ACTIVE_CHANNELS],
831  OffsetT num_row_pixels,
832  OffsetT num_rows,
833  size_t row_stride_bytes,
834  cudaStream_t stream = 0,
835  bool debug_synchronous = false)
836  {
838  typedef typename std::iterator_traits<SampleIteratorT>::value_type SampleT;
839  Int2Type<sizeof(SampleT) == 1> is_byte_sample;
840 
841  if ((sizeof(OffsetT) > sizeof(int)) && (row_stride_bytes * num_rows < std::numeric_limits<int>::max()))
842  {
843  // Down-convert OffsetT data type
844  return DipatchHistogram<NUM_CHANNELS, NUM_ACTIVE_CHANNELS, SampleIteratorT, CounterT, LevelT, int>::DispatchRange(
845  d_temp_storage, temp_storage_bytes, d_samples, d_histogram, num_levels, d_levels,
846  (int) num_row_pixels, (int) num_rows, (int) (row_stride_bytes / sizeof(SampleT)),
847  stream, debug_synchronous, is_byte_sample);
848  }
849 
850  return DipatchHistogram<NUM_CHANNELS, NUM_ACTIVE_CHANNELS, SampleIteratorT, CounterT, LevelT, OffsetT>::DispatchRange(
851  d_temp_storage, temp_storage_bytes, d_samples, d_histogram, num_levels, d_levels,
852  num_row_pixels, num_rows, (OffsetT) (row_stride_bytes / sizeof(SampleT)),
853  stream, debug_synchronous, is_byte_sample);
854  }
855 
856 
857 
859 };
860 
865 } // CUB namespace
866 CUB_NS_POSTFIX // Optional outer namespace(s)
867 
868