DAW JSON Link
daw_json_parse_unsigned_int.h
Go to the documentation of this file.
1 // Copyright (c) Darrell Wright
2 //
3 // Distributed under the Boost Software License, Version 1.0. (See accompanying
4 // file LICENSE or copy at http://www.boost.org/LICENSE_1_0.txt)
5 //
6 // Official repository: https://github.com/beached/daw_json_link
7 //
8 
9 #pragma once
10 
11 #include "daw_json_assert.h"
12 #include "daw_json_parse_digit.h"
13 
14 #include <daw/daw_arith_traits.h>
15 #include <daw/daw_cxmath.h>
16 #include <daw/daw_uint_buffer.h>
17 
18 #include <ciso646>
19 #include <cstddef>
20 #include <cstdint>
21 #include <utility>
22 
23 #ifdef DAW_ALLOW_SSE42
24 #include <emmintrin.h>
25 #include <smmintrin.h>
26 #include <tmmintrin.h>
27 #include <xmmintrin.h>
28 #ifdef _MSC_VER
29 #include <intrin.h>
30 #endif
31 #endif
32 
33 namespace daw::json::json_details {
34  [[nodiscard]] static inline constexpr bool
35  is_made_of_eight_digits_cx( const char *ptr ) {
36  // The copy to local buffer is to get the compiler to treat it like a
37  // reinterpret_cast
38  std::byte buff[8]{ };
39  for( std::size_t n = 0; n < 8; ++n ) {
40  buff[n] = static_cast<std::byte>( ptr[n] );
41  }
42  UInt64 val = UInt64( );
43 
44  for( std::size_t n = 0; n < 8; ++n ) {
45  val |= to_uint64( buff[n] ) << ( 8 * n );
46  }
47  return (
48  ( ( val & 0xF0F0'F0F0'F0F0'F0F0_u64 ) |
49  ( ( ( val + 0x0606'0606'0606'0606_u64 ) & 0xF0F0'F0F0'F0F0'F0F0_u64 ) >>
50  4U ) ) == 0x3333'3333'3333'3333_u64 );
51  }
52 
53  template<JsonRangeCheck RangeCheck, typename Unsigned,
54  typename MaxArithUnsigned>
55  using max_unsigned_t = std::conditional_t<
56  (daw::is_integral_v<Unsigned> or std::is_enum_v<Unsigned>),
57  std::conditional_t<( sizeof( Unsigned ) > sizeof( MaxArithUnsigned ) ),
58  Unsigned, MaxArithUnsigned>,
59  Unsigned>;
60 
61  // Constexpr'ified version from
62  // https://kholdstare.github.io/technical/2020/05/26/faster-integer-parsing.html
63  inline constexpr UInt64 parse_8_digits( const char *const str ) {
64  auto const chunk = daw::to_uint64_buffer( str );
65  // 1-byte mask trick (works on 4 pairs of single digits)
66  auto const lower_digits = ( chunk & 0x0f'00'0f'00'0f'00'0f'00_u64 ) >> 8U;
67  auto const upper_digits = ( chunk & 0x00'0f'00'0f'00'0f'00'0f_u64 ) * 10U;
68  auto const chunk2 = lower_digits + upper_digits;
69 
70  // 2-byte mask trick (works on 2 pairs of two digits)
71  auto const lower_digits2 =
72  ( chunk2 & 0x00'ff'00'00'00'ff'00'00_u64 ) >> 16U;
73  auto const upper_digits2 =
74  ( chunk2 & 0x00'00'00'ff'00'00'00'ff_u64 ) * 100U;
75  auto const chunk3 = lower_digits2 + upper_digits2;
76 
77  // 4-byte mask trick (works on pair of four digits)
78  auto const lower_digits3 =
79  ( chunk3 & 0x00'00'ff'ff'00'00'00'00_u64 ) >> 32U;
80  auto const upper_digits3 =
81  ( chunk3 & 0x00'00'00'00'00'00'ff'ff_u64 ) * 10000U;
82  auto const chunk4 = lower_digits3 + upper_digits3;
83 
84  return chunk4 & 0xFFFF'FFFF_u64;
85  }
86  static_assert( parse_8_digits( "12345678" ) == 1234'5678_u64,
87  "8 digit parser does not work on this platform" );
88  inline constexpr UInt64 parse_16_digits( const char *const str ) {
89  auto const upper = parse_8_digits( str );
90  auto const lower = parse_8_digits( str + 8 );
91  return upper * 100'000'000_u64 + lower;
92  }
93  static_assert( parse_16_digits( "1234567890123456" ) == 1234567890123456_u64,
94  "16 digit parser does not work on this platform" );
95 
96  template<typename Unsigned, JsonRangeCheck RangeChecked, bool KnownBounds,
97  typename Range,
98  std::enable_if_t<KnownBounds, std::nullptr_t> = nullptr>
99  [[nodiscard]] static constexpr Unsigned
100  unsigned_parser( constexpr_exec_tag const &, Range &rng ) {
101  // We know how many digits are in the number
102  using result_t = max_unsigned_t<RangeChecked, Unsigned, UInt64>;
103  static_assert( not static_cast<bool>( RangeChecked ) or
104  std::is_same_v<result_t, UInt64>,
105  "Range checking is only supported for std integral types" );
106 
107  char const *first = rng.first;
108  char const *const last = rng.last;
109  result_t result = result_t( );
110 
111  while( last - first >= 16 ) {
112  result *= static_cast<result_t>( 10'000'000'000'000'000ULL );
113  result += static_cast<result_t>( parse_16_digits( first ) );
114  first += 16;
115  }
116  if( last - first >= 8 ) {
117  result *= static_cast<result_t>( 100'000'000ULL );
118  result += static_cast<result_t>( parse_8_digits( first ) );
119  first += 8;
120  }
121  while( first < last ) {
122  result *= 10U;
123  result += parse_digit( *first );
124  ++first;
125  }
126  if constexpr( RangeChecked != JsonRangeCheck::Never ) {
127  auto const count =
128  ( daw::numeric_limits<result_t>::digits10 + 1U ) - rng.size( );
129  daw_json_assert( ( ( result <= daw::numeric_limits<result_t>::max( ) ) &
130  ( count >= 0 ) ),
131  ErrorReason::NumberOutOfRange, rng );
132  }
133  rng.first = first;
134  if constexpr( RangeChecked == JsonRangeCheck::Never ) {
135  return daw::construct_a<Unsigned>( static_cast<Unsigned>( result ) );
136  } else {
137  return daw::construct_a<Unsigned>( daw::narrow_cast<Unsigned>( result ) );
138  }
139  }
140 
141  //**************************
142  template<typename Unsigned, JsonRangeCheck RangeChecked, bool KnownBounds,
143  typename Range,
144  std::enable_if_t<not KnownBounds, std::nullptr_t> = nullptr>
145  [[nodiscard]] static constexpr Unsigned
146  unsigned_parser( constexpr_exec_tag const &, Range &rng ) {
147  // We do not know how long the string is
148  using result_t = max_unsigned_t<RangeChecked, Unsigned, UInt64>;
149  static_assert( not static_cast<bool>( RangeChecked ) or
150  std::is_same_v<result_t, UInt64>,
151  "Range checking is only supported for std integral types" );
152  daw_json_assert_weak( rng.has_more( ), ErrorReason::UnexpectedEndOfData,
153  rng );
154  char const *first = rng.first;
155  char const *const orig_first = first;
156  char const *const last = rng.last;
157  result_t result = result_t( );
158  bool has_eight =
159  last - first >= 8 ? is_made_of_eight_digits_cx( first ) : false;
160  if( has_eight & ( last - first >= 16 ) ) {
161  bool has_sixteen = is_made_of_eight_digits_cx( first + 8 );
162  while( has_sixteen ) {
163  result *= static_cast<result_t>( 10'000'000'000'000'000ULL );
164  result += static_cast<result_t>( parse_16_digits( first ) );
165  first += 16;
166  has_eight =
167  last - first >= 8 ? is_made_of_eight_digits_cx( first ) : false;
168  has_sixteen =
169  has_eight and
170  ( last - first >= 16 ? is_made_of_eight_digits_cx( first + 8 )
171  : false );
172  }
173  }
174  if( has_eight ) {
175  result *= static_cast<result_t>( 100'000'000ULL );
176  result += static_cast<result_t>( parse_8_digits( first ) );
177  first += 8;
178  }
179  auto dig = parse_digit( *first );
180 
181  while( dig < 10U ) {
182  result *= 10U;
183  result += dig;
184  ++first;
185  dig = parse_digit( *first );
186  }
187 
188  if constexpr( RangeChecked != JsonRangeCheck::Never ) {
189  auto const count = static_cast<std::ptrdiff_t>(
190  daw::numeric_limits<Unsigned>::digits10 + 1 ) -
191  ( first - orig_first );
192  daw_json_assert( count >= 0, ErrorReason::NumberOutOfRange, rng );
193  }
194 
195  rng.first = first;
196  if constexpr( RangeChecked == JsonRangeCheck::Never ) {
197  return daw::construct_a<Unsigned>( static_cast<Unsigned>( result ) );
198  } else {
199  return daw::construct_a<Unsigned>( daw::narrow_cast<Unsigned>( result ) );
200  }
201  }
202 
203 #ifdef DAW_ALLOW_SSE42
204  /*
205  // Adapted from
206  //
207  //
208  https://github.com/lemire/simdjson/blob/102262c7abe64b517a36a6049b39d95f58bf4aea/src/haswell/numberparsing.h
209  static inline UInt64 parse_eight_digits_unrolled( const char *ptr ) {
210  // this actually computes *16* values so we are being wasteful.
211  static __m128i const ascii0 = _mm_set1_epi8( '0' );
212 
213  static __m128i const mul_1_10 =
214  _mm_setr_epi8( 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1 );
215 
216  static __m128i const mul_1_100 =
217  _mm_setr_epi16( 100, 1, 100, 1, 100, 1, 100, 1 );
218 
219  static __m128i const mul_1_10000 =
220  _mm_setr_epi16( 10000, 1, 10000, 1, 10000, 1, 10000, 1 );
221 
222  __m128i const input = _mm_sub_epi8(
223  _mm_loadu_si128( reinterpret_cast<__m128i const *>( ptr ) ), ascii0 );
224  __m128i const t1 = _mm_maddubs_epi16( input, mul_1_10 );
225  __m128i const t2 = _mm_madd_epi16( t1, mul_1_100 );
226  __m128i const t3 = _mm_packus_epi32( t2, t2 );
227  __m128i const t4 = _mm_madd_epi16( t3, mul_1_10000 );
228  return to_uint64( static_cast<std::uint32_t>( _mm_cvtsi128_si32(
229  t4 ) ) ); // only captures the sum of the first 8 digits, drop the rest
230  }
231 
232  static inline UInt64 parse_sixteen_digits_unrolled( const char *ptr ) {
233  static __m128i const ascii0 = _mm_set1_epi8( '0' );
234 
235  static __m128i const mul_1_10 =
236  _mm_setr_epi8( 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1 );
237 
238  static __m128i const mul_1_100 =
239  _mm_setr_epi16( 100, 1, 100, 1, 100, 1, 100, 1 );
240 
241  static __m128i const mul_1_10000 =
242  _mm_setr_epi16( 10000, 1, 10000, 1, 10000, 1, 10000, 1 );
243 
244  __m128i const input = _mm_sub_epi8(
245  _mm_loadu_si128( reinterpret_cast<__m128i const *>( ptr ) ), ascii0 );
246  __m128i const t1 = _mm_maddubs_epi16( input, mul_1_10 );
247  __m128i const t2 = _mm_madd_epi16( t1, mul_1_100 );
248  __m128i const t3 = _mm_packus_epi32( t2, t2 );
249  __m128i const t4 = _mm_madd_epi16( t3, mul_1_10000 );
250  return to_uint64( _mm_cvtsi128_si64( t4 ) );
251  }
252 
253  [[nodiscard]] static inline bool
254  is_made_of_eight_digits_fast( const char *ptr ) {
255  UInt64 val;
256  memcpy( &val, ptr, sizeof( std::uint64_t ) );
257  return ( ( ( val & 0xF0F0F0F0F0F0F0F0_u64 ) |
258  ( ( ( val + 0x0606060606060606_u64 ) & 0xF0F0F0F0F0F0F0F0_u64 )
259  >> 4_u64 ) ) == 0x3333333333333333_u64 );
260  }
261 
262  template<typename Unsigned, JsonRangeCheck RangeChecked, bool, typename Range>
263  [[nodiscard]] static inline Unsigned
264  unsigned_parser( sse42_exec_tag const &, Range &rng ) {
265  daw_json_assert_weak( rng.has_more( ), ErrorRange::UnexpectedEndOfData, rng
266  ); using result_t = max_unsigned_t<RangeChecked, Unsigned, UInt64>; result_t
267  result = result_t( ); char const *first = rng.first; char const *const last =
268  rng.last; char const *const orig_first = first;
269  {
270  auto sz = last - first;
271  while( ( sz >= 8 ) & is_made_of_eight_digits_fast( first ) ) {
272  if( ( sz < 16 ) | ( not is_made_of_eight_digits_fast( first + 8 ) ) ) {
273  result *= 100'000'000_u64;
274  result +=
275  static_cast<result_t>( parse_eight_digits_unrolled( first ) );
276  first += 8;
277  break;
278  }
279  result *= 10'000'000'000'000'000_u64;
280  result +=
281  static_cast<result_t>( parse_sixteen_digits_unrolled( first ) );
282  sz -= 16;
283  first += 16;
284  }
285  }
286 
287  auto dig = parse_digit( *first );
288  while( dig < 10U ) {
289  result *= 10U;
290  result += dig;
291  ++first;
292  dig = parse_digit( *first );
293  }
294  if constexpr( RangeChecked != JsonRangeCheck::Never ) {
295  auto const count =
296  static_cast<intmax_t>( daw::numeric_limits<Unsigned>::digits10 + 1 ) -
297  ( first - orig_first );
298  daw_json_assert( (count >= 0) &
299  (result <= static_cast<result_t>(
300  daw::numeric_limits<Unsigned>::max( ) )),
301  ErrorReason::NumberOutOfRange, rng );
302  }
303  rng.first = first;
304  if constexpr( RangeChecked == JsonRangeCheck::Never ) {
305  return daw::construct_a<Unsigned>( static_cast<Unsigned>( result ) );
306  } else {
307  return daw::construct_a<Unsigned>( daw::narrow_cast<Unsigned>( result ) );
308  }
309  }
310  */
311 #endif
312 } // namespace daw::json::json_details
daw_json_assert
#define daw_json_assert(Bool,...)
Definition: daw_json_assert.h:196
daw_json_parse_digit.h
daw_json_assert.h
daw::json::JsonRangeCheck
JsonRangeCheck
Definition: daw_json_parse_common.h:410