36 #include "specializations/block_histogram_sort.cuh"
37 #include "specializations/block_histogram_atomic.cuh"
38 #include "../util_ptx.cuh"
39 #include "../util_arch.cuh"
40 #include "../util_namespace.cuh"
151 int ITEMS_PER_THREAD,
156 int PTX_ARCH = CUB_PTX_ARCH>
169 BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z,
185 BlockHistogramSort<T, BLOCK_DIM_X, ITEMS_PER_THREAD, BINS, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH>,
186 BlockHistogramAtomic<BINS> >::Type InternalBlockHistogram;
189 typedef typename InternalBlockHistogram::TempStorage _TempStorage;
197 _TempStorage &temp_storage;
208 __device__ __forceinline__ _TempStorage& PrivateStorage()
210 __shared__ _TempStorage private_storage;
211 return private_storage;
231 temp_storage(PrivateStorage()),
232 linear_tid(
RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z))
242 temp_storage(temp_storage.Alias()),
243 linear_tid(
RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z))
290 template <
typename CounterT >
294 int histo_offset = 0;
297 for(; histo_offset + BLOCK_THREADS <= BINS; histo_offset += BLOCK_THREADS)
299 histogram[histo_offset + linear_tid] = 0;
302 if ((BINS % BLOCK_THREADS != 0) && (histo_offset + linear_tid < BINS))
304 histogram[histo_offset + linear_tid] = 0;
348 T (&items)[ITEMS_PER_THREAD],
349 CounterT histogram[BINS])
357 InternalBlockHistogram(temp_storage).Composite(items, histogram);
405 T (&items)[ITEMS_PER_THREAD],
406 CounterT histogram[BINS])
408 InternalBlockHistogram(temp_storage).Composite(items, histogram);