DAW JSON Link
daw_json_parse_unsigned_int.h
Go to the documentation of this file.
1 // Copyright (c) Darrell Wright
2 //
3 // Distributed under the Boost Software License, Version 1.0. (See accompanying
4 // file LICENSE or copy at http://www.boost.org/LICENSE_1_0.txt)
5 //
6 // Official repository: https://github.com/beached/daw_json_link
7 //
8 
9 #pragma once
10 
11 #include "daw_json_assert.h"
12 #include "daw_json_parse_digit.h"
13 #include "version.h"
14 
15 #include <daw/daw_arith_traits.h>
16 #include <daw/daw_cxmath.h>
17 #include <daw/daw_uint_buffer.h>
18 
19 #include <ciso646>
20 #include <cstddef>
21 #include <cstdint>
22 #include <utility>
23 
24 #ifdef DAW_ALLOW_SSE42
25 #include <emmintrin.h>
26 #include <smmintrin.h>
27 #include <tmmintrin.h>
28 #include <xmmintrin.h>
29 #ifdef _MSC_VER
30 #include <intrin.h>
31 #endif
32 #endif
33 
34 namespace daw::json {
35  inline namespace DAW_JSON_VER {
36  namespace json_details {
37  [[nodiscard]] static inline constexpr bool
38  is_made_of_eight_digits_cx( const char *ptr ) {
39  // The copy to local buffer is to get the compiler to treat it like a
40  // reinterpret_cast
41 
42  std::byte const buff[8]{
43  static_cast<std::byte>( ptr[0] ), static_cast<std::byte>( ptr[1] ),
44  static_cast<std::byte>( ptr[2] ), static_cast<std::byte>( ptr[3] ),
45  static_cast<std::byte>( ptr[4] ), static_cast<std::byte>( ptr[5] ),
46  static_cast<std::byte>( ptr[6] ), static_cast<std::byte>( ptr[7] ) };
47 
48  UInt64 val = UInt64( );
49  for( std::size_t n = 0; n < 8; ++n ) {
50  val |= to_uint64( buff[n] ) << ( 8 * n );
51  }
52  return ( ( ( val & 0xF0F0'F0F0'F0F0'F0F0_u64 ) |
53  ( ( ( val + 0x0606'0606'0606'0606_u64 ) &
54  0xF0F0'F0F0'F0F0'F0F0_u64 ) >>
55  4U ) ) == 0x3333'3333'3333'3333_u64 );
56  }
57 
58  template<JsonRangeCheck RangeCheck, typename Unsigned,
59  typename MaxArithUnsigned>
60  using max_unsigned_t = std::conditional_t<
61  std::disjunction<daw::is_integral<Unsigned>,
62  std::is_enum<Unsigned>>::value,
63  std::conditional_t<( sizeof( Unsigned ) > sizeof( MaxArithUnsigned ) ),
64  Unsigned, MaxArithUnsigned>,
65  Unsigned>;
66 
67  // Constexpr'ified version from
68  // https://kholdstare.github.io/technical/2020/05/26/faster-integer-parsing.html
69  inline constexpr UInt64 parse_8_digits( const char *const str ) {
70  auto const chunk = daw::to_uint64_buffer( str );
71  // 1-byte mask trick (works on 4 pairs of single digits)
72  auto const lower_digits =
73  ( chunk & 0x0F'00'0F'00'0F'00'0F'00_u64 ) >> 8U;
74  auto const upper_digits =
75  ( chunk & 0x00'0F'00'0F'00'0F'00'0F_u64 ) * 10U;
76  auto const chunk2 = lower_digits + upper_digits;
77 
78  // 2-byte mask trick (works on 2 pairs of two digits)
79  auto const lower_digits2 =
80  ( chunk2 & 0x00'FF'00'00'00'FF'00'00_u64 ) >> 16U;
81  auto const upper_digits2 =
82  ( chunk2 & 0x00'00'00'FF'00'00'00'FF_u64 ) * 100U;
83  auto const chunk3 = lower_digits2 + upper_digits2;
84 
85  // 4-byte mask trick (works on pair of four digits)
86  auto const lower_digits3 =
87  ( chunk3 & 0x00'00'FF'FF'00'00'00'00_u64 ) >> 32U;
88  auto const upper_digits3 =
89  ( chunk3 & 0x00'00'00'00'00'00'FF'FF_u64 ) * 10000U;
90  auto const chunk4 = lower_digits3 + upper_digits3;
91 
92  return chunk4 & 0xFFFF'FFFF_u64;
93  }
94  static_assert( parse_8_digits( "12345678" ) == 1234'5678_u64,
95  "8 digit parser does not work on this platform" );
96  inline constexpr UInt64 parse_16_digits( const char *const str ) {
97  auto const upper = parse_8_digits( str );
98  auto const lower = parse_8_digits( str + 8 );
99  return upper * 100'000'000_u64 + lower;
100  }
101  static_assert( parse_16_digits( "1234567890123456" ) ==
102  1234567890123456_u64,
103  "16 digit parser does not work on this platform" );
104 
105  template<typename Unsigned, JsonRangeCheck RangeChecked, bool KnownBounds,
106  typename ParseState,
107  std::enable_if_t<KnownBounds, std::nullptr_t> = nullptr>
108  [[nodiscard]] static constexpr Unsigned
110  using CharT = typename ParseState::CharT;
111  // We know how many digits are in the number
113  static_assert(
114  not static_cast<bool>( RangeChecked ) or
115  std::is_same<result_t, UInt64>::value,
116  "Range checking is only supported for std integral types" );
117 
118  CharT *first = parse_state.first;
119  CharT *const last = parse_state.last;
120  result_t result = result_t( );
121 
122  while( last - first >= 16 ) {
123  result *= static_cast<result_t>( 10'000'000'000'000'000ULL );
124  result += static_cast<result_t>( parse_16_digits( first ) );
125  first += 16;
126  }
127  if( last - first >= 8 ) {
128  result *= static_cast<result_t>( 100'000'000ULL );
129  result += static_cast<result_t>( parse_8_digits( first ) );
130  first += 8;
131  }
132  if constexpr( ParseState::is_zero_terminated_string ) {
133  auto dig = parse_digit( *first );
134  while( dig < 10U ) {
135  result *= 10U;
136  result += dig;
137  ++first;
138  dig = parse_digit( *first );
139  }
140  } else {
141  while( first < last ) {
142  result *= 10U;
143  result += parse_digit( *first );
144  ++first;
145  }
146  }
147  if constexpr( RangeChecked != JsonRangeCheck::Never ) {
148  auto const count = ( daw::numeric_limits<result_t>::digits10 + 1U ) -
149  std::size( parse_state );
151  ( ( result <= daw::numeric_limits<result_t>::max( ) ) &
152  ( count >= 0 ) ),
153  ErrorReason::NumberOutOfRange, parse_state );
154  }
155  parse_state.first = first;
156  if constexpr( RangeChecked == JsonRangeCheck::Never ) {
157  return daw::construct_a<Unsigned>( static_cast<Unsigned>( result ) );
158  } else {
159  return daw::construct_a<Unsigned>(
160  daw::narrow_cast<Unsigned>( result ) );
161  }
162  }
163 
164  //**************************
165  template<typename Unsigned, JsonRangeCheck RangeChecked, bool KnownBounds,
166  typename ParseState,
167  std::enable_if_t<not KnownBounds, std::nullptr_t> = nullptr>
168  [[nodiscard]] static constexpr Unsigned
170  using CharT = typename ParseState::CharT;
171  // We do not know how long the string is
172  using result_t = max_unsigned_t<RangeChecked, Unsigned, UInt64>;
173  static_assert(
174  not static_cast<bool>( RangeChecked ) or
175  std::is_same<result_t, UInt64>::value,
176  "Range checking is only supported for std integral types" );
177  daw_json_assert_weak( parse_state.has_more( ),
178  ErrorReason::UnexpectedEndOfData, parse_state );
179  CharT *first = parse_state.first;
180  CharT *const orig_first = first;
181  (void)orig_first; // only used inside if constexpr and gcc9 warns
182  CharT *const last = parse_state.last;
183  result_t result = result_t( );
184  bool has_eight =
185  last - first >= 8 ? is_made_of_eight_digits_cx( first ) : false;
186  if( has_eight & ( last - first >= 16 ) ) {
187  bool has_sixteen = is_made_of_eight_digits_cx( first + 8 );
188  while( has_sixteen ) {
189  result *= static_cast<result_t>( 10'000'000'000'000'000ULL );
190  result += static_cast<result_t>( parse_16_digits( first ) );
191  first += 16;
192  has_eight =
193  last - first >= 8 ? is_made_of_eight_digits_cx( first ) : false;
194  has_sixteen =
195  has_eight and
196  ( last - first >= 16 ? is_made_of_eight_digits_cx( first + 8 )
197  : false );
198  }
199  }
200  if( has_eight ) {
201  result *= static_cast<result_t>( 100'000'000ULL );
202  result += static_cast<result_t>( parse_8_digits( first ) );
203  first += 8;
204  }
205  if constexpr( ParseState::is_zero_terminated_string ) {
206  auto dig = parse_digit( *first );
207  while( dig < 10U ) {
208  result *= 10U;
209  result += dig;
210  ++first;
211  dig = parse_digit( *first );
212  }
213  } else {
214  auto dig = parse_digit( *first );
215  while( first < last and dig < 10U ) {
216  result *= 10U;
217  result += dig;
218  ++first;
219  dig = parse_digit( *first );
220  }
221  }
222 
223  if constexpr( RangeChecked != JsonRangeCheck::Never ) {
224  auto const count = static_cast<std::ptrdiff_t>(
225  daw::numeric_limits<Unsigned>::digits10 + 1 ) -
226  ( first - orig_first );
227  daw_json_assert( count >= 0, ErrorReason::NumberOutOfRange,
228  parse_state );
229  }
230 
231  parse_state.first = first;
232  if constexpr( RangeChecked == JsonRangeCheck::Never ) {
233  return daw::construct_a<Unsigned>( static_cast<Unsigned>( result ) );
234  } else {
235  return daw::construct_a<Unsigned>(
236  daw::narrow_cast<Unsigned>( result ) );
237  }
238  }
239 
240 #ifdef DAW_ALLOW_SSE42
241  /*
242  // Adapted from
243  //
244  //
245  https://github.com/lemire/simdjson/blob/102262c7abe64b517a36a6049b39d95f58bf4aea/src/haswell/numberparsing.h
246  static inline UInt64 parse_eight_digits_unrolled( const char *ptr ) {
247  // this actually computes *16* values so we are being wasteful.
248  static __m128i const ascii0 = _mm_set1_epi8( '0' );
249 
250  static __m128i const mul_1_10 =
251  _mm_setr_epi8( 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1
252  );
253 
254  static __m128i const mul_1_100 =
255  _mm_setr_epi16( 100, 1, 100, 1, 100, 1, 100, 1 );
256 
257  static __m128i const mul_1_10000 =
258  _mm_setr_epi16( 10000, 1, 10000, 1, 10000, 1, 10000, 1 );
259 
260  __m128i const input = _mm_sub_epi8(
261  _mm_loadu_si128( reinterpret_cast<__m128i const *>( ptr ) ), ascii0 );
262  __m128i const t1 = _mm_maddubs_epi16( input, mul_1_10 );
263  __m128i const t2 = _mm_madd_epi16( t1, mul_1_100 );
264  __m128i const t3 = _mm_packus_epi32( t2, t2 );
265  __m128i const t4 = _mm_madd_epi16( t3, mul_1_10000 );
266  return to_uint64( static_cast<std::uint32_t>( _mm_cvtsi128_si32(
267  t4 ) ) ); // only captures the sum of the first 8 digits, drop the
268  rest
269  }
270 
271  static inline UInt64 parse_sixteen_digits_unrolled( const char *ptr ) {
272  static __m128i const ascii0 = _mm_set1_epi8( '0' );
273 
274  static __m128i const mul_1_10 =
275  _mm_setr_epi8( 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1
276  );
277 
278  static __m128i const mul_1_100 =
279  _mm_setr_epi16( 100, 1, 100, 1, 100, 1, 100, 1 );
280 
281  static __m128i const mul_1_10000 =
282  _mm_setr_epi16( 10000, 1, 10000, 1, 10000, 1, 10000, 1 );
283 
284  __m128i const input = _mm_sub_epi8(
285  _mm_loadu_si128( reinterpret_cast<__m128i const *>( ptr ) ), ascii0 );
286  __m128i const t1 = _mm_maddubs_epi16( input, mul_1_10 );
287  __m128i const t2 = _mm_madd_epi16( t1, mul_1_100 );
288  __m128i const t3 = _mm_packus_epi32( t2, t2 );
289  __m128i const t4 = _mm_madd_epi16( t3, mul_1_10000 );
290  return to_uint64( _mm_cvtsi128_si64( t4 ) );
291  }
292 
293  [[nodiscard]] static inline bool
294  is_made_of_eight_digits_fast( const char *ptr ) {
295  UInt64 val;
296  memcpy( &val, ptr, sizeof( std::uint64_t ) );
297  return ( ( ( val & 0xF0F0F0F0F0F0F0F0_u64 ) |
298  ( ( ( val + 0x0606060606060606_u64 ) & 0xF0F0F0F0F0F0F0F0_u64
299  )
300  >> 4_u64 ) ) == 0x3333333333333333_u64 );
301  }
302 
303  template<typename Unsigned, JsonRangeCheck RangeChecked, bool, typename
304  ParseState>
305  [[nodiscard]] static inline Unsigned
306  unsigned_parser( sse42_exec_tag , ParseState &parse_state ) {
307  daw_json_assert_weak( parse_state.has_more( ),
308  ErrorRange::UnexpectedEndOfData, parse_state
309  ); using result_t = max_unsigned_t<RangeChecked, Unsigned, UInt64>;
310  result_t result = result_t( ); CharT *first = parse_state.first; CharT
311  *const last = parse_state.last; CharT *const orig_first =
312  first;
313  {
314  auto sz = last - first;
315  while( ( sz >= 8 ) & is_made_of_eight_digits_fast( first ) ) {
316  if( ( sz < 16 ) | ( not is_made_of_eight_digits_fast( first + 8 ) )
317  ) { result *= 100'000'000_u64; result += static_cast<result_t>(
318  parse_eight_digits_unrolled( first ) ); first += 8; break;
319  }
320  result *= 10'000'000'000'000'000_u64;
321  result +=
322  static_cast<result_t>( parse_sixteen_digits_unrolled( first ) );
323  sz -= 16;
324  first += 16;
325  }
326  }
327 
328  auto dig = parse_digit( *first );
329  while( dig < 10U ) {
330  result *= 10U;
331  result += dig;
332  ++first;
333  dig = parse_digit( *first );
334  }
335  if constexpr( RangeChecked != JsonRangeCheck::Never ) {
336  auto const count =
337  static_cast<intmax_t>( daw::numeric_limits<Unsigned>::digits10 + 1 )
338  - ( first - orig_first ); daw_json_assert( (count >= 0) & (result <=
339  static_cast<result_t>( daw::numeric_limits<Unsigned>::max( ) )),
340  ErrorReason::NumberOutOfRange,
341  parse_state
342  );
343  }
344  parse_state.first = first;
345  if constexpr( RangeChecked == JsonRangeCheck::Never ) {
346  return daw::construct_a<Unsigned>( static_cast<Unsigned>( result ) );
347  } else {
348  return daw::construct_a<Unsigned>( daw::narrow_cast<Unsigned>( result
349  ) );
350  }
351  }
352  */
353 #endif
354  } // namespace json_details
355  } // namespace DAW_JSON_VER
356 } // namespace daw::json
#define daw_json_assert_weak(Bool,...)
Definition: daw_json_assert.h:189
#define daw_json_assert(Bool,...)
Definition: daw_json_assert.h:178
ParseState & parse_state
Definition: daw_json_parse_class.h:201
static constexpr Unsigned unsigned_parser(constexpr_exec_tag, ParseState &parse_state)
Definition: daw_json_parse_unsigned_int.h:109
constexpr UInt64 parse_8_digits(const char *const str)
Definition: daw_json_parse_unsigned_int.h:69
std::conditional_t< std::disjunction< daw::is_integral< Unsigned >, std::is_enum< Unsigned > >::value, std::conditional_t<(sizeof(Unsigned) > sizeof(MaxArithUnsigned)), Unsigned, MaxArithUnsigned >, Unsigned > max_unsigned_t
Definition: daw_json_parse_unsigned_int.h:65
constexpr UInt64 parse_16_digits(const char *const str)
Definition: daw_json_parse_unsigned_int.h:96
static constexpr bool is_made_of_eight_digits_cx(const char *ptr)
Definition: daw_json_parse_unsigned_int.h:38
static constexpr DAW_ATTRIB_FLATINLINE unsigned parse_digit(char c)
Definition: daw_json_parse_digit.h:19
JsonRangeCheck
Definition: daw_json_type_options.h:52
Definition: daw_from_json.h:22
Definition: daw_json_exec_modes.h:19
#define DAW_JSON_VER
Definition: version.h:11