DAW JSON Link
daw_not_const_ex_functions.h
Go to the documentation of this file.
1// Copyright (c) Darrell Wright
2//
3// Distributed under the Boost Software License, Version 1.0. (See accompanying
4// file LICENSE or copy at http://www.boost.org/LICENSE_1_0.txt)
5//
6// Official repository: https://github.com/beached/daw_json_link
7//
8
9#pragma once
10
11#include "daw_json_exec_modes.h"
12#include "version.h"
13
14#include <daw/daw_attributes.h>
15#include <daw/daw_cpp_feature_check.h>
16#include <daw/daw_cxmath.h>
17#include <daw/daw_do_n.h>
18#include <daw/daw_likely.h>
19#include <daw/daw_uint_buffer.h>
20#include <daw/daw_unreachable.h>
21
22#if defined( DAW_ALLOW_SSE42 )
23#include <emmintrin.h>
24#include <nmmintrin.h>
25#include <smmintrin.h>
26#include <tmmintrin.h>
27#include <wmmintrin.h>
28#include <xmmintrin.h>
29#ifdef _MSC_VER
30#include <intrin.h>
31#endif
32#endif
33
34#include <ciso646>
35#include <cstddef>
36#include <cstring>
37
38namespace daw::json {
39 inline namespace DAW_JSON_VER {
40 namespace json_details {
41 DAW_ATTRIB_INLINE inline constexpr bool
42 is_escaped( char const *ptr, char const *min_ptr ) {
43 if( *( ptr - 1 ) != '\\' ) {
44 return false;
45 }
46 if( ( ptr - min_ptr ) < 2 ) {
47 return false;
48 }
49 return *( ptr - 2 ) != '\\';
50 }
51
52#if defined( DAW_ALLOW_SSE42 )
53 struct key_table_t {
54 alignas( 16 ) bool values[256] = { };
55
56 constexpr bool operator[]( char idx ) const {
57 return values[static_cast<unsigned char>( idx )];
58 }
59 };
60
61 template<char... keys>
62 static constexpr inline key_table_t key_table = [] {
63 auto result = key_table_t{ };
64 (void)( ( result.values[static_cast<unsigned char>( keys )] = true ) |
65 ... );
66 return result;
67 }( );
68#endif
69
70 inline std::ptrdiff_t find_lsb_set( runtime_exec_tag, UInt32 value ) {
71#if DAW_HAS_BUILTIN( __builtin_ffs )
72 return __builtin_ffs( static_cast<int>( value ) ) - 1;
73#elif defined( _MSC_VER )
74 unsigned long index;
75 _BitScanForward( &index, static_cast<int>( value ) );
76 return static_cast<std::ptrdiff_t>( index );
77#else
78 std::ptrdiff_t result = 0;
79 if( value == 0 ) {
80 return -1;
81 }
82 while( ( value & 1 ) == 0 ) {
83 value >>= 1;
84 ++result;
85 }
86 return result;
87#endif
88 }
89
90#if defined( DAW_ALLOW_SSE42 )
91 DAW_ATTRIB_INLINE inline __m128i
92 set_reverse( char c0, char c1 = 0, char c2 = 0, char c3 = 0, char c4 = 0,
93 char c5 = 0, char c6 = 0, char c7 = 0, char c8 = 0,
94 char c9 = 0, char c10 = 0, char c11 = 0, char c12 = 0,
95 char c13 = 0, char c14 = 0, char c15 = 0 ) {
96 return _mm_set_epi8( c15, c14, c13, c12, c11, c10, c9, c8, c7, c6, c5,
97 c4, c3, c2, c1, c0 );
98 }
99
100 DAW_ATTRIB_INLINE inline __m128i uload16_char_data( sse42_exec_tag,
101 char const *ptr ) {
102 return _mm_loadu_si128( reinterpret_cast<__m128i const *>( ptr ) );
103 }
104
105 DAW_ATTRIB_INLINE inline __m128i load16_char_data( sse42_exec_tag,
106 char const *ptr ) {
107 return _mm_load_si128( reinterpret_cast<__m128i const *>( ptr ) );
108 }
109
110 template<char k>
111 DAW_ATTRIB_INLINE inline UInt32 mem_find_eq( sse42_exec_tag,
112 __m128i block ) {
113 __m128i const keys = _mm_set1_epi8( k );
114 __m128i const found = _mm_cmpeq_epi8( block, keys );
115 return to_uint32( _mm_movemask_epi8( found ) );
116 }
117
118 template<unsigned char k>
119 DAW_ATTRIB_INLINE inline UInt32 mem_find_gt( sse42_exec_tag,
120 __m128i block ) {
121 static __m128i const keys = _mm_set1_epi8( k );
122 __m128i const found = _mm_cmpgt_epi8( block, keys );
123 return to_uint32( _mm_movemask_epi8( found ) );
124 }
125
126 template<bool is_unchecked_input, char... keys, typename CharT>
127 DAW_ATTRIB_INLINE inline CharT *mem_move_to_next_of( sse42_exec_tag tag,
128 CharT *first,
129 CharT *const last ) {
130
131 while( last - first >= 16 ) {
132 auto const val0 = uload16_char_data( tag, first );
133 auto const key_positions = ( mem_find_eq<keys>( tag, val0 ) | ... );
134 if( key_positions != 0 ) {
135 return first + find_lsb_set( tag, key_positions );
136 }
137 first += 16;
138 }
139 __m128i val1{ };
140 auto const max_pos = last - first;
141 memcpy( &val1, first, static_cast<std::size_t>( max_pos ) );
142 auto const key_positions = ( mem_find_eq<keys>( tag, val1 ) | ... );
143 if( key_positions != 0 ) {
144 auto const offset = find_lsb_set( tag, key_positions );
145 if( offset >= max_pos ) {
146 return last;
147 }
148 return first + offset;
149 }
150 return last;
151 }
152
153 template<bool is_unchecked_input, char... keys, typename CharT>
154 DAW_ATTRIB_INLINE inline CharT *
155 mem_move_to_next_not_of( sse42_exec_tag tag, CharT *first, CharT *last ) {
156 using keys_len = daw::constant<static_cast<int>( sizeof...( keys ) )>;
157 using compare_mode = daw::constant<static_cast<int>(
158 _SIDD_SBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_NEGATIVE_POLARITY )>;
159 static_assert( keys_len::value <= 16 );
160
161 __m128i const a = set_reverse( keys... );
162
163 while( last - first >= 16 ) {
164 auto const b = uload16_char_data( tag, first );
165 int const result =
166 _mm_cmpestri( a, keys_len::value, b, 16, compare_mode::value );
167 first += result;
168 if( result < 16 ) {
169 return first;
170 }
171 }
172 __m128i b{ };
173 auto const max_pos = last - first;
174 int const result =
175 _mm_cmpestri( a, keys_len::vlaue, b, 16, compare_mode::value );
176 if( result < max_pos ) {
177 return first + result;
178 }
179 return last;
180 }
181
182 template<typename U32>
183 DAW_ATTRIB_INLINE inline bool add_overflow( U32 value1, U32 value2,
184 U32 &result ) {
185 static_assert( sizeof( U32 ) <= sizeof( unsigned long long ) );
186 static_assert( sizeof( U32 ) == 4 );
187#if( defined( __GNUC__ ) and __GNUC__ >= 8 ) or defined( __clang__ ) or \
188 ( DAW_HAS_BUILTIN( __builtin_uadd_overflow ) and \
189 DAW_HAS_BUILTIN( __builtin_uaddl_overflow ) and \
190 DAW_HAS_BUILTIN( __builtin_uaddll_overflow ) )
191 if constexpr( sizeof( unsigned ) == sizeof( U32 ) ) {
192 return __builtin_uadd_overflow(
193 static_cast<unsigned>( value1 ), static_cast<unsigned>( value2 ),
194 reinterpret_cast<unsigned *>( &result ) );
195 } else if constexpr( sizeof( unsigned long ) == sizeof( U32 ) ) {
196 return __builtin_uaddl_overflow(
197 static_cast<unsigned long>( value1 ),
198 static_cast<unsigned long>( value2 ),
199 reinterpret_cast<unsigned long *>( &result ) );
200 } else {
201 return __builtin_uaddll_overflow(
202 static_cast<unsigned long long>( value1 ),
203 static_cast<unsigned long long>( value2 ),
204 reinterpret_cast<unsigned long long *>( &result ) );
205 }
206#else
207 return _addcarry_u32( 0, static_cast<std::uint32_t>( value1 ),
208 static_cast<std::uint32_t>( value2 ),
209 reinterpret_cast<std::uint32_t *>( &result ) );
210#endif
211 }
212
213 // Adapted from
214 // https://github.com/simdjson/simdjson/blob/master/src/generic/stage1/json_string_scanner.h#L79
215 DAW_ATTRIB_INLINE inline constexpr UInt32
216 find_escaped_branchless( constexpr_exec_tag, UInt32 &prev_escaped,
217 UInt32 backslashes ) {
218 backslashes &= ~prev_escaped;
219 UInt32 follow_escape = ( backslashes << 1 ) | prev_escaped;
220 using even_bits = daw::constant<0x5555'5555_u32>;
221
222 UInt32 const odd_seq_start =
223 backslashes & ( ~even_bits::value ) & ( ~follow_escape );
224 UInt32 seq_start_on_even_bits = 0_u32;
225 prev_escaped = [&] {
226 auto r = odd_seq_start + backslashes;
227 seq_start_on_even_bits = 0x0000'FFFF_u32 & r;
228 r >>= 16U;
229 return r;
230 }( );
231 UInt32 invert_mask = seq_start_on_even_bits << 1U;
232
233 return ( even_bits::value ^ invert_mask ) & follow_escape;
234 }
235
236 DAW_ATTRIB_INLINE inline UInt32 prefix_xor( sse42_exec_tag,
237 UInt32 bitmask ) {
238 __m128i const all_ones = _mm_set1_epi8( '\xFF' );
239 __m128i const result = _mm_clmulepi64_si128(
240 _mm_set_epi32( 0, 0, 0, static_cast<std::int32_t>( bitmask ) ),
241 all_ones, 0 );
242 return to_uint32( _mm_cvtsi128_si32( result ) );
243 }
244
245 template<bool is_unchecked_input, typename CharT>
246 inline CharT *mem_skip_until_end_of_string( simd_exec_tag tag,
247 CharT *first,
248 CharT *const last ) {
249 UInt32 prev_escapes = 0_u32;
250 while( last - first >= 16 ) {
251 auto const val0 = uload16_char_data( tag, first );
252 UInt32 const backslashes = mem_find_eq<'\\'>( tag, val0 );
253 UInt32 const escaped =
254 find_escaped_branchless( tag, prev_escapes, backslashes );
255 UInt32 const quotes = mem_find_eq<'"'>( tag, val0 ) & ( ~escaped );
256 UInt32 const in_string = prefix_xor( tag, quotes );
257 if( in_string != 0 ) {
258 first += find_lsb_set( tag, in_string );
259 return first;
260 }
261 first += 16;
262 }
263 if constexpr( is_unchecked_input ) {
264 while( *first != '"' ) {
265 while( not key_table<'"', '\\'>[*first] ) {
266 ++first;
267 }
268 if( *first == '"' ) {
269 return first;
270 }
271 first += 2;
272 }
273 } else {
274 while( DAW_LIKELY( first < last ) and *first != '"' ) {
275 while( DAW_LIKELY( first < last ) and
276 not key_table<'"', '\\'>[*first] ) {
277 ++first;
278 }
279 if( first >= last ) {
280 return last;
281 }
282 if( *first == '"' ) {
283 return first;
284 }
285 first += 2;
286 }
287 }
288 return ( is_unchecked_input or DAW_LIKELY( first < last ) ) ? first
289 : last;
290 }
291
292 template<bool is_unchecked_input, typename CharT>
293 inline CharT *
294 mem_skip_until_end_of_string( simd_exec_tag tag, CharT *first,
295 CharT *const last,
296 std::ptrdiff_t &first_escape ) {
297 CharT *const first_first = first;
298 UInt32 prev_escapes = 0_u32;
299 while( last - first >= 16 ) {
300 auto const val0 = uload16_char_data( tag, first );
301 UInt32 const backslashes = mem_find_eq<'\\'>( tag, val0 );
302 if( ( backslashes != 0 ) & ( first_escape < 0 ) ) {
303 first_escape = find_lsb_set( tag, backslashes );
304 }
305 UInt32 const escaped =
306 find_escaped_branchless( tag, prev_escapes, backslashes );
307 UInt32 const quotes = mem_find_eq<'"'>( tag, val0 ) & ( ~escaped );
308 UInt32 const in_string = prefix_xor( tag, quotes );
309 if( in_string != 0 ) {
310 first += find_lsb_set( tag, in_string );
311 return first;
312 }
313 first += 16;
314 }
315 if constexpr( is_unchecked_input ) {
316 while( *first != '"' ) {
317 while( not key_table<'"', '\\'>[*first] ) {
318 ++first;
319 }
320 if( *first == '"' ) {
321 return first;
322 }
323 if( first_escape < 0 ) {
324 first_escape = first_first - first;
325 }
326 first += 2;
327 }
328 } else {
329 while( DAW_LIKELY( first < last ) and *first != '"' ) {
330 while( DAW_LIKELY( first < last ) and
331 not key_table<'"', '\\'>[*first] ) {
332 ++first;
333 }
334 if( first >= last ) {
335 return last;
336 }
337 if( *first == '"' ) {
338 return first;
339 }
340 if( first_escape < 0 ) {
341 first_escape = first_first - first;
342 }
343 first += 2;
344 }
345 }
346 return ( is_unchecked_input or DAW_LIKELY( first < last ) ) ? first
347 : last;
348 }
349
350#endif
351 template<bool is_unchecked_input, char... keys, typename CharT>
352 DAW_ATTRIB_INLINE inline CharT *
353 mem_move_to_next_of( runtime_exec_tag, CharT *first, CharT *last ) {
354 if constexpr( sizeof...( keys ) == 1 ) {
355 char const key[]{ keys... };
356 auto *ptr = reinterpret_cast<CharT *>( std::memchr(
357 first, key[0], static_cast<std::size_t>( last - first ) ) );
358 if( ptr == nullptr ) {
359 ptr = last;
360 }
361 return ptr;
362 } else {
363 constexpr auto eq = []( char l, char r ) { return l == r; };
364 while( is_unchecked_input or first < last ) {
365 char const c = *first;
366 if( ( eq( c, keys ) | ... ) ) {
367 return first;
368 }
369 ++first;
370 }
371 return first;
372 }
373 }
374
375 template<
376 bool is_unchecked_input, typename ExecTag, typename CharT,
377 std::enable_if_t<std::is_base_of<runtime_exec_tag, ExecTag>::value,
378 std::nullptr_t> = nullptr>
379 DAW_ATTRIB_INLINE inline CharT *
380 mem_skip_string( ExecTag const &tag, CharT *first, CharT *const last ) {
381 return mem_move_to_next_of<is_unchecked_input, '"', '\\'>( tag, first,
382 last );
383 }
384
385 template<
386 bool is_unchecked_input, typename ExecTag, typename CharT,
387 std::enable_if_t<std::is_base_of<runtime_exec_tag, ExecTag>::value,
388 std::nullptr_t> = nullptr>
389 DAW_ATTRIB_INLINE inline CharT *
390 mem_skip_until_end_of_string( ExecTag const &tag, CharT *first,
391 CharT *const last ) {
392 if constexpr( not is_unchecked_input ) {
393 daw_json_assert( first < last, ErrorReason::UnexpectedEndOfData );
394 }
395 first = mem_move_to_next_of<is_unchecked_input, '\\', '"'>( tag, first,
396 last );
397 while( is_unchecked_input or first < last ) {
398 switch( *first ) {
399 case '"':
400 return first;
401 case '\\':
402 if constexpr( is_unchecked_input ) {
403 ++first;
404 } else {
405 first += static_cast<int>( static_cast<bool>( last - first ) );
406 }
407 break;
408 }
409 ++first;
410 first = mem_move_to_next_of<is_unchecked_input, '\\', '"'>(
411 tag, first, last );
412 }
413 return first;
414 }
415
416 template<bool is_unchecked_input, typename CharT>
417 DAW_ATTRIB_INLINE inline CharT *
419 CharT *const last,
420 std::ptrdiff_t &first_escape ) {
421 CharT *const first_first = first;
422 if constexpr( not is_unchecked_input ) {
423 daw_json_assert( first < last, ErrorReason::UnexpectedEndOfData );
424 }
425 first = mem_move_to_next_of<is_unchecked_input, '\\', '"'>( tag, first,
426 last );
427 while( is_unchecked_input or first < last ) {
428 switch( *first ) {
429 case '"':
430 return first;
431 case '\\':
432 if( first_escape < 0 ) {
433 first_escape = first_first - first;
434 }
435 if constexpr( is_unchecked_input ) {
436 ++first;
437 } else {
438 first += static_cast<int>( static_cast<bool>( last - first ) );
439 }
440 break;
441 }
442 ++first;
443 first = mem_move_to_next_of<is_unchecked_input, '\\', '"'>(
444 tag, first, last );
445 }
446 return first;
447 }
448 } // namespace json_details
449 } // namespace DAW_JSON_VER
450} // namespace daw::json
#define daw_json_assert(Bool,...)
Definition: daw_json_assert.h:179
constexpr DAW_ATTRIB_INLINE bool is_escaped(char const *ptr, char const *min_ptr)
Definition: daw_not_const_ex_functions.h:42
std::ptrdiff_t find_lsb_set(runtime_exec_tag, UInt32 value)
Definition: daw_not_const_ex_functions.h:70
DAW_ATTRIB_INLINE CharT * mem_skip_until_end_of_string(ExecTag const &tag, CharT *first, CharT *const last)
Definition: daw_not_const_ex_functions.h:390
DAW_ATTRIB_INLINE CharT * mem_skip_string(ExecTag const &tag, CharT *first, CharT *const last)
Definition: daw_not_const_ex_functions.h:380
DAW_ATTRIB_INLINE CharT * mem_move_to_next_of(runtime_exec_tag, CharT *first, CharT *last)
Definition: daw_not_const_ex_functions.h:353
Definition: daw_from_json.h:22
Definition: daw_json_exec_modes.h:28
#define DAW_JSON_VER
The version string used in namespace definitions. Must be a valid namespace name.
Definition: version.h:16