DAW JSON Link
daw_json_parse_unsigned_int.h
Go to the documentation of this file.
1// Copyright (c) Darrell Wright
2//
3// Distributed under the Boost Software License, Version 1.0. (See accompanying
4// file LICENSE or copy at http://www.boost.org/LICENSE_1_0.txt)
5//
6// Official repository: https://github.com/beached/daw_json_link
7//
8
9#pragma once
10
11#include "daw_json_assert.h"
13#include "version.h"
14
15#include <daw/daw_arith_traits.h>
16#include <daw/daw_cxmath.h>
17#include <daw/daw_uint_buffer.h>
18
19#include <ciso646>
20#include <cstddef>
21#include <cstdint>
22#include <utility>
23
24#ifdef DAW_ALLOW_SSE42
25#include <emmintrin.h>
26#include <smmintrin.h>
27#include <tmmintrin.h>
28#include <xmmintrin.h>
29#ifdef _MSC_VER
30#include <intrin.h>
31#endif
32#endif
33
34namespace daw::json {
35 inline namespace DAW_JSON_VER {
36 namespace json_details {
37 template<typename Signed, typename Unsigned>
38 constexpr Signed to_signed( Unsigned &&u, Signed sign ) {
39 using unsigned_t = daw::remove_cvref_t<Unsigned>;
40 if constexpr( sizeof( Signed ) >= sizeof( intmax_t ) and
41 daw::is_system_integral_v<unsigned_t> and
42 daw::is_system_integral_v<Signed> and
43 sizeof( Signed ) == sizeof( unsigned_t ) ) {
44 if( DAW_UNLIKELY( u == ( static_cast<unsigned_t>(
45 ( daw::numeric_limits<Signed>::max )( ) ) +
46 1 ) ) ) {
47 // the bits of static_cast<unsigned_t>( limits<Signed>::max( ) ) + 1
48 // are the same as limits<Signed>::min( ). We can just cast
49 return static_cast<Signed>( u );
50 }
51 return static_cast<Signed>( sign * static_cast<Signed>( u ) );
52 } else {
53 return static_cast<Signed>( sign * static_cast<Signed>( u ) );
54 }
55 }
56
57 [[nodiscard]] static inline constexpr bool
58 is_made_of_eight_digits_cx( const char *ptr ) {
59 // The copy to local buffer is to get the compiler to treat it like a
60 // reinterpret_cast
61
62 std::byte const buff[8]{
63 static_cast<std::byte>( ptr[0] ), static_cast<std::byte>( ptr[1] ),
64 static_cast<std::byte>( ptr[2] ), static_cast<std::byte>( ptr[3] ),
65 static_cast<std::byte>( ptr[4] ), static_cast<std::byte>( ptr[5] ),
66 static_cast<std::byte>( ptr[6] ), static_cast<std::byte>( ptr[7] ) };
67
68 UInt64 val = UInt64( );
69 for( std::size_t n = 0; n < 8; ++n ) {
70 val |= to_uint64( buff[n] ) << ( 8 * n );
71 }
72 return ( ( ( val & 0xF0F0'F0F0'F0F0'F0F0_u64 ) |
73 ( ( ( val + 0x0606'0606'0606'0606_u64 ) &
74 0xF0F0'F0F0'F0F0'F0F0_u64 ) >>
75 4U ) ) == 0x3333'3333'3333'3333_u64 );
76 }
77
78 template<JsonRangeCheck RangeCheck, typename Unsigned,
79 typename MaxArithUnsigned>
80 using max_unsigned_t = std::conditional_t<
81 std::disjunction<daw::is_integral<Unsigned>,
82 std::is_enum<Unsigned>>::value,
83 std::conditional_t<( sizeof( Unsigned ) > sizeof( MaxArithUnsigned ) ),
84 Unsigned, MaxArithUnsigned>,
85 Unsigned>;
86
87 // Constexpr'ified version from
88 // https://kholdstare.github.io/technical/2020/05/26/faster-integer-parsing.html
89 inline constexpr UInt64 parse_8_digits( const char *const str ) {
90 auto const chunk = daw::to_uint64_buffer( str );
91 // 1-byte mask trick (works on 4 pairs of single digits)
92 auto const lower_digits =
93 ( chunk & 0x0F'00'0F'00'0F'00'0F'00_u64 ) >> 8U;
94 auto const upper_digits =
95 ( chunk & 0x00'0F'00'0F'00'0F'00'0F_u64 ) * 10U;
96 auto const chunk2 = lower_digits + upper_digits;
97
98 // 2-byte mask trick (works on 2 pairs of two digits)
99 auto const lower_digits2 =
100 ( chunk2 & 0x00'FF'00'00'00'FF'00'00_u64 ) >> 16U;
101 auto const upper_digits2 =
102 ( chunk2 & 0x00'00'00'FF'00'00'00'FF_u64 ) * 100U;
103 auto const chunk3 = lower_digits2 + upper_digits2;
104
105 // 4-byte mask trick (works on pair of four digits)
106 auto const lower_digits3 =
107 ( chunk3 & 0x00'00'FF'FF'00'00'00'00_u64 ) >> 32U;
108 auto const upper_digits3 =
109 ( chunk3 & 0x00'00'00'00'00'00'FF'FF_u64 ) * 10000U;
110 auto const chunk4 = lower_digits3 + upper_digits3;
111
112 return chunk4 & 0xFFFF'FFFF_u64;
113 }
114 static_assert( parse_8_digits( "12345678" ) == 1234'5678_u64,
115 "8 digit parser does not work on this platform" );
116 inline constexpr UInt64 parse_16_digits( const char *const str ) {
117 auto const upper = parse_8_digits( str );
118 auto const lower = parse_8_digits( str + 8 );
119 return upper * 100'000'000_u64 + lower;
120 }
121 static_assert( parse_16_digits( "1234567890123456" ) ==
122 1234567890123456_u64,
123 "16 digit parser does not work on this platform" );
124
125 template<typename Unsigned, JsonRangeCheck RangeChecked, bool KnownBounds,
126 typename ParseState,
127 std::enable_if_t<KnownBounds, std::nullptr_t> = nullptr>
128 [[nodiscard]] static constexpr Unsigned
130 using CharT = typename ParseState::CharT;
131 // We know how many digits are in the number
133 using uresult_t =
135 static_assert(
136 not static_cast<bool>( RangeChecked ) or
137 std::is_same<uresult_t, UInt64>::value,
138 "Range checking is only supported for std integral types" );
139
140 CharT *first = parse_state.first;
141 CharT *const last = parse_state.last;
142 uresult_t result = uresult_t( );
143
144 while( last - first >= 16 ) {
145 result *= static_cast<uresult_t>( 10'000'000'000'000'000ULL );
146 result += static_cast<uresult_t>( parse_16_digits( first ) );
147 first += 16;
148 }
149 if( last - first >= 8 ) {
150 result *= static_cast<uresult_t>( 100'000'000ULL );
151 result += static_cast<uresult_t>( parse_8_digits( first ) );
152 first += 8;
153 }
154 if constexpr( ParseState::is_zero_terminated_string ) {
155 auto dig = parse_digit( *first );
156 while( dig < 10U ) {
157 result *= 10U;
158 result += dig;
159 ++first;
160 dig = parse_digit( *first );
161 }
162 } else {
163 while( first < last ) {
164 result *= 10U;
165 result += parse_digit( *first );
166 ++first;
167 }
168 }
169 if constexpr( RangeChecked != JsonRangeCheck::Never ) {
170 auto const count = ( daw::numeric_limits<result_t>::digits10 + 1U ) -
171 std::size( parse_state );
173 ( ( result <= static_cast<uresult_t>(
174 ( daw::numeric_limits<result_t>::max )( ) ) ) &
175 ( count >= 0 ) ),
176 ErrorReason::NumberOutOfRange, parse_state );
177 }
178 parse_state.first = first;
179 if constexpr( RangeChecked == JsonRangeCheck::Never ) {
180 return daw::construct_a<Unsigned>( static_cast<Unsigned>( result ) );
181 } else {
182 return daw::construct_a<Unsigned>(
183 daw::narrow_cast<Unsigned>( result ) );
184 }
185 }
186
187 //**************************
188 template<typename Unsigned, JsonRangeCheck RangeChecked, bool KnownBounds,
189 typename ParseState,
190 std::enable_if_t<not KnownBounds, std::nullptr_t> = nullptr>
191 [[nodiscard]] static constexpr Unsigned
193 using CharT = typename ParseState::CharT;
194 // We do not know how long the string is
195 using result_t = max_unsigned_t<RangeChecked, Unsigned, UInt64>;
196 using uresult_t =
197 max_unsigned_t<RangeChecked, daw::make_unsigned_t<Unsigned>, UInt64>;
198 static_assert(
199 not static_cast<bool>( RangeChecked ) or
200 std::is_same<uresult_t, UInt64>::value,
201 "Range checking is only supported for std integral types" );
203 ErrorReason::UnexpectedEndOfData, parse_state );
204 CharT *first = parse_state.first;
205 CharT *const orig_first = first;
206 (void)orig_first; // only used inside if constexpr and gcc9 warns
207 CharT *const last = parse_state.last;
208 uresult_t result = uresult_t( );
209 bool has_eight =
210 last - first >= 8 ? is_made_of_eight_digits_cx( first ) : false;
211 if( has_eight & ( last - first >= 16 ) ) {
212 bool has_sixteen = is_made_of_eight_digits_cx( first + 8 );
213 while( has_sixteen ) {
214 result *= static_cast<uresult_t>( 10'000'000'000'000'000ULL );
215 result += static_cast<uresult_t>( parse_16_digits( first ) );
216 first += 16;
217 has_eight =
218 last - first >= 8 ? is_made_of_eight_digits_cx( first ) : false;
219 has_sixteen =
220 has_eight and
221 ( last - first >= 16 ? is_made_of_eight_digits_cx( first + 8 )
222 : false );
223 }
224 }
225 if( has_eight ) {
226 result *= static_cast<uresult_t>( 100'000'000ULL );
227 result += static_cast<uresult_t>( parse_8_digits( first ) );
228 first += 8;
229 }
230 if constexpr( ParseState::is_zero_terminated_string ) {
231 auto dig = parse_digit( *first );
232 while( dig < 10U ) {
233 result *= 10U;
234 result += dig;
235 ++first;
236 dig = parse_digit( *first );
237 }
238 } else {
239 auto dig = parse_digit( *first );
240 while( first < last and dig < 10U ) {
241 result *= 10U;
242 result += dig;
243 ++first;
244 dig = parse_digit( *first );
245 }
246 }
247
248 if constexpr( RangeChecked != JsonRangeCheck::Never ) {
249 auto const count = static_cast<std::ptrdiff_t>(
250 daw::numeric_limits<result_t>::digits10 + 1 ) -
251 ( first - orig_first );
252 daw_json_assert( count >= 0, ErrorReason::NumberOutOfRange,
253 parse_state );
254 }
255
256 parse_state.first = first;
257 if constexpr( RangeChecked == JsonRangeCheck::Never ) {
258 return daw::construct_a<Unsigned>(
259 static_cast<Unsigned>( static_cast<result_t>( result ) ) );
260 } else {
261 return daw::construct_a<Unsigned>(
262 daw::narrow_cast<Unsigned>( result ) );
263 }
264 }
265
266#if false and defined( DAW_ALLOW_SSE42 )
267 /*
268 // Adapted from
269 //
270 //
271 https://github.com/lemire/simdjson/blob/102262c7abe64b517a36a6049b39d95f58bf4aea/src/haswell/numberparsing.h
272 static inline UInt64 parse_eight_digits_unrolled( const char *ptr ) {
273 // this actually computes *16* values so we are being wasteful.
274 static __m128i const ascii0 = _mm_set1_epi8( '0' );
275
276 static __m128i const mul_1_10 =
277 _mm_setr_epi8( 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1
278 );
279
280 static __m128i const mul_1_100 =
281 _mm_setr_epi16( 100, 1, 100, 1, 100, 1, 100, 1 );
282
283 static __m128i const mul_1_10000 =
284 _mm_setr_epi16( 10000, 1, 10000, 1, 10000, 1, 10000, 1 );
285
286 __m128i const input = _mm_sub_epi8(
287 _mm_loadu_si128( reinterpret_cast<__m128i const *>( ptr ) ), ascii0 );
288 __m128i const t1 = _mm_maddubs_epi16( input, mul_1_10 );
289 __m128i const t2 = _mm_madd_epi16( t1, mul_1_100 );
290 __m128i const t3 = _mm_packus_epi32( t2, t2 );
291 __m128i const t4 = _mm_madd_epi16( t3, mul_1_10000 );
292 return to_uint64( static_cast<std::uint32_t>( _mm_cvtsi128_si32(
293 t4 ) ) ); // only captures the sum of the first 8 digits, drop the
294 rest
295 }
296
297 static inline UInt64 parse_sixteen_digits_unrolled( const char *ptr ) {
298 static __m128i const ascii0 = _mm_set1_epi8( '0' );
299
300 static __m128i const mul_1_10 =
301 _mm_setr_epi8( 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1
302 );
303
304 static __m128i const mul_1_100 =
305 _mm_setr_epi16( 100, 1, 100, 1, 100, 1, 100, 1 );
306
307 static __m128i const mul_1_10000 =
308 _mm_setr_epi16( 10000, 1, 10000, 1, 10000, 1, 10000, 1 );
309
310 __m128i const input = _mm_sub_epi8(
311 _mm_loadu_si128( reinterpret_cast<__m128i const *>( ptr ) ), ascii0 );
312 __m128i const t1 = _mm_maddubs_epi16( input, mul_1_10 );
313 __m128i const t2 = _mm_madd_epi16( t1, mul_1_100 );
314 __m128i const t3 = _mm_packus_epi32( t2, t2 );
315 __m128i const t4 = _mm_madd_epi16( t3, mul_1_10000 );
316 return to_uint64( _mm_cvtsi128_si64( t4 ) );
317 }
318
319 [[nodiscard]] static inline bool
320 is_made_of_eight_digits_fast( const char *ptr ) {
321 UInt64 val;
322 memcpy( &val, ptr, sizeof( std::uint64_t ) );
323 return ( ( ( val & 0xF0F0F0F0F0F0F0F0_u64 ) |
324 ( ( ( val + 0x0606060606060606_u64 ) & 0xF0F0F0F0F0F0F0F0_u64
325 )
326 >> 4_u64 ) ) == 0x3333333333333333_u64 );
327 }
328
329 template<typename Unsigned, JsonRangeCheck RangeChecked, bool, typename
330 ParseState>
331 [[nodiscard]] static inline Unsigned
332 unsigned_parser( sse42_exec_tag , ParseState &parse_state ) {
333 daw_json_assert_weak( parse_state.has_more( ),
334 ErrorRange::UnexpectedEndOfData, parse_state
335 ); using result_t = max_unsigned_t<RangeChecked, Unsigned, UInt64>;
336 result_t result = result_t( ); CharT *first = parse_state.first; CharT
337 *const last = parse_state.last; CharT *const orig_first =
338 first;
339 {
340 auto sz = last - first;
341 while( ( sz >= 8 ) & is_made_of_eight_digits_fast( first ) ) {
342 if( ( sz < 16 ) | ( not is_made_of_eight_digits_fast( first + 8 ) )
343 ) { result *= 100'000'000_u64; result += static_cast<result_t>(
344 parse_eight_digits_unrolled( first ) ); first += 8; break;
345 }
346 result *= 10'000'000'000'000'000_u64;
347 result +=
348 static_cast<result_t>( parse_sixteen_digits_unrolled( first ) );
349 sz -= 16;
350 first += 16;
351 }
352 }
353
354 auto dig = parse_digit( *first );
355 while( dig < 10U ) {
356 result *= 10U;
357 result += dig;
358 ++first;
359 dig = parse_digit( *first );
360 }
361 if constexpr( RangeChecked != JsonRangeCheck::Never ) {
362 auto const count =
363 static_cast<intmax_t>( daw::numeric_limits<Unsigned>::digits10 + 1 )
364 - ( first - orig_first ); daw_json_assert( (count >= 0) & (result <=
365 static_cast<result_t>( (daw::numeric_limits<Unsigned>::max)( ) )),
366 ErrorReason::NumberOutOfRange,
367 parse_state
368 );
369 }
370 parse_state.first = first;
371 if constexpr( RangeChecked == JsonRangeCheck::Never ) {
372 return daw::construct_a<Unsigned>( static_cast<Unsigned>( result ) );
373 } else {
374 return daw::construct_a<Unsigned>( daw::narrow_cast<Unsigned>( result
375 ) );
376 }
377 }
378 */
379#endif
380 } // namespace json_details
381 } // namespace DAW_JSON_VER
382} // namespace daw::json
#define daw_json_assert_weak(Bool,...)
Definition: daw_json_assert.h:190
#define daw_json_assert(Bool,...)
Definition: daw_json_assert.h:179
ParseState & parse_state
Definition: daw_json_parse_class.h:182
static constexpr Unsigned unsigned_parser(constexpr_exec_tag, ParseState &parse_state)
Definition: daw_json_parse_unsigned_int.h:129
constexpr UInt64 parse_8_digits(const char *const str)
Definition: daw_json_parse_unsigned_int.h:89
std::conditional_t< std::disjunction< daw::is_integral< Unsigned >, std::is_enum< Unsigned > >::value, std::conditional_t<(sizeof(Unsigned) > sizeof(MaxArithUnsigned)), Unsigned, MaxArithUnsigned >, Unsigned > max_unsigned_t
Definition: daw_json_parse_unsigned_int.h:85
constexpr UInt64 parse_16_digits(const char *const str)
Definition: daw_json_parse_unsigned_int.h:116
static constexpr bool is_made_of_eight_digits_cx(const char *ptr)
Definition: daw_json_parse_unsigned_int.h:58
constexpr Signed to_signed(Unsigned &&u, Signed sign)
Definition: daw_json_parse_unsigned_int.h:38
static constexpr DAW_ATTRIB_FLATINLINE unsigned parse_digit(char c)
Definition: daw_json_parse_digit.h:19
@ Signed
Number - Floating Point.
@ Unsigned
Number - Signed Integer.
JsonRangeCheck
Definition: daw_json_type_options.h:52
Definition: daw_from_json.h:22
Definition: daw_json_exec_modes.h:19
#define DAW_JSON_VER
The version string used in namespace definitions. Must be a valid namespace name.
Definition: version.h:16