DAW JSON Link
daw_not_const_ex_functions.h
Go to the documentation of this file.
1 // Copyright (c) Darrell Wright
2 //
3 // Distributed under the Boost Software License, Version 1.0. (See accompanying
4 // file LICENSE or copy at http://www.boost.org/LICENSE_1_0.txt)
5 //
6 // Official repository: https://github.com/beached/daw_json_link
7 //
8 
9 #pragma once
10 
11 #include "daw_json_exec_modes.h"
12 
13 #include <daw/daw_hide.h>
14 #include <daw/daw_uint_buffer.h>
15 
16 #if defined( DAW_ALLOW_SSE42 )
17 #include <emmintrin.h>
18 #include <nmmintrin.h>
19 #include <smmintrin.h>
20 #include <tmmintrin.h>
21 #include <wmmintrin.h>
22 #include <xmmintrin.h>
23 #ifdef _MSC_VER
24 #include <intrin.h>
25 #endif
26 #endif
27 
28 #include <ciso646>
29 #include <cstddef>
30 #include <cstring>
31 
32 namespace daw::json::json_details {
33  DAW_ATTRIBUTE_FLATTEN static inline constexpr bool
34  is_escaped( char const *ptr, char const *min_ptr ) {
35  if( *( ptr - 1 ) != '\\' ) {
36  return false;
37  }
38  if( ( ptr - min_ptr ) < 2 ) {
39  return false;
40  }
41  return *( ptr - 2 ) != '\\';
42  }
43 
44  struct key_table_t {
45  alignas( 16 ) bool values[256] = { };
46 
47  constexpr bool operator[]( char idx ) const {
48  return values[static_cast<unsigned char>( idx )];
49  }
50  };
51 
52  template<char... keys>
53  static constexpr inline key_table_t key_table = [] {
54  auto result = key_table_t{ };
55  (void)( ( result.values[static_cast<unsigned char>( keys )] = true ) |
56  ... );
57  return result;
58  }( );
59 
60  static inline std::ptrdiff_t find_lsb_set( runtime_exec_tag const &,
61  UInt32 value ) {
62 #if defined( __GNUC__ ) or defined( __clang__ )
63  return __builtin_ffs( static_cast<int>( value ) ) - 1;
64 #else
65  unsigned long index;
66  _BitScanForward( &index, static_cast<int>( value ) );
67  return static_cast<std::ptrdiff_t>( index );
68 #endif
69  }
70 
71 #if defined( DAW_ALLOW_SSE42 )
72  DAW_ATTRIBUTE_FLATTEN static inline __m128i
73  set_reverse( char c0, char c1 = 0, char c2 = 0, char c3 = 0, char c4 = 0,
74  char c5 = 0, char c6 = 0, char c7 = 0, char c8 = 0, char c9 = 0,
75  char c10 = 0, char c11 = 0, char c12 = 0, char c13 = 0,
76  char c14 = 0, char c15 = 0 ) {
77  return _mm_set_epi8( c15, c14, c13, c12, c11, c10, c9, c8, c7, c6, c5, c4,
78  c3, c2, c1, c0 );
79  }
80 
81  DAW_ATTRIBUTE_FLATTEN static inline __m128i
82  uload16_char_data( sse42_exec_tag const &, char const *ptr ) {
83  return _mm_loadu_si128( reinterpret_cast<__m128i const *>( ptr ) );
84  }
85 
86  template<char k>
87  DAW_ATTRIBUTE_FLATTEN static inline UInt32
88  mem_find_eq( sse42_exec_tag const &, __m128i block ) {
89  static __m128i const keys = _mm_set1_epi8( k );
90  __m128i const found = _mm_cmpeq_epi8( block, keys );
91  return to_uint32( _mm_movemask_epi8( found ) );
92  }
93 
94  template<unsigned char k>
95  DAW_ATTRIBUTE_FLATTEN static inline UInt32
96  mem_find_gt( sse42_exec_tag const &, __m128i block ) {
97  static __m128i const keys = _mm_set1_epi8( k );
98  __m128i const found = _mm_cmpgt_epi8( block, keys );
99  return to_uint32( _mm_movemask_epi8( found ) );
100  }
101 
102  template<bool is_unchecked_input, char... keys>
103  DAW_ATTRIBUTE_FLATTEN static inline char const *
104  mem_move_to_next_of( sse42_exec_tag const &tag, char const *first,
105  char const *const last ) {
106 
107  while( last - first >= 16 ) {
108  auto const val0 = uload16_char_data( tag, first );
109  auto const key_positions = ( mem_find_eq<keys>( tag, val0 ) | ... );
110  if( key_positions != 0 ) {
111  return first + find_lsb_set( tag, key_positions );
112  }
113  first += 16;
114  }
115  if( last - first >= 8 ) {
116  char const buff[16]{ first[0], first[1], first[2], first[3],
117  first[4], first[5], first[6], first[7] };
118  auto const val0 = uload16_char_data( tag, buff );
119  auto const key_positions =
120  ( mem_find_eq<keys>( tag, val0 ) | ... ) & mask_from_lsb32<8>;
121  if( key_positions != 0 ) {
122  return first + find_lsb_set( tag, key_positions );
123  }
124  first += 8;
125  }
126  if( last - first >= 4 ) {
127  char const buff[16]{ first[0], first[1], first[2], first[3] };
128  auto const val0 = uload16_char_data( tag, buff );
129  auto const key_positions =
130  ( mem_find_eq<keys>( tag, val0 ) | ... ) & mask_from_lsb32<4>;
131  if( key_positions != 0 ) {
132  return first + find_lsb_set( tag, key_positions );
133  }
134  first += 4;
135  }
136  switch( last - first ) {
137  case 0:
138  return first;
139  case 1:
140  return first +
141  static_cast<std::ptrdiff_t>( not key_table<keys...>[*first] );
142  case 2: {
143  char const buff[16]{ first[0], first[1] };
144  auto const val0 = uload16_char_data( tag, buff );
145  auto const key_positions =
146  ( mem_find_eq<keys>( tag, val0 ) | ... ) & mask_from_lsb32<2>;
147  if( key_positions != 0 ) {
148  return first + find_lsb_set( tag, key_positions );
149  }
150  return last;
151  }
152  case 3: {
153  char const buff[16]{ first[0], first[1], first[2] };
154  auto const val0 = uload16_char_data( tag, buff );
155  auto const key_positions =
156  ( mem_find_eq<keys>( tag, val0 ) | ... ) & mask_from_lsb32<3>;
157  if( key_positions != 0 ) {
158  return first + find_lsb_set( tag, key_positions );
159  }
160  return last;
161  }
162  }
163  DAW_UNREACHABLE( );
164  }
165 
166  template<bool is_unchecked_input, char... keys>
167  DAW_ATTRIBUTE_FLATTEN static inline char const *
168  mem_move_to_next_not_of( sse42_exec_tag const &tag, char const *first,
169  char const *last ) {
170  static constexpr int keys_len = static_cast<int>( sizeof...( keys ) );
171  static_assert( keys_len <= 16 );
172  __m128i const a = set_reverse( keys... );
173  static constexpr int compare_mode =
174  _SIDD_SBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_NEGATIVE_POLARITY;
175 
176  while( last - first >= 16 ) {
177  auto const b = uload16_char_data( tag, first );
178  int const b_len = 16;
179  int result = _mm_cmpestri( a, keys_len, b, b_len, compare_mode );
180  first += result;
181  if( result < 16 ) {
182  return first;
183  }
184  }
185  static constexpr auto is_eq = []( char c ) {
186  return ( ( c == keys ) | ... );
187  };
188  if constexpr( is_unchecked_input ) {
189  while( is_eq( *first ) ) {
190  ++first;
191  }
192  } else {
193  while( first < last and is_eq( *first ) ) {
194  ++first;
195  }
196  }
197  return first;
198  }
199 
200  template<typename U32>
201  DAW_ATTRIBUTE_FLATTEN static inline bool add_overflow( U32 value1, U32 value2,
202  U32 &result ) {
203  static_assert( sizeof( U32 ) <= sizeof( unsigned long long ) );
204  static_assert( sizeof( U32 ) == 4 );
205 #if defined( __clang__ ) or defined( __GNUC__ )
206  if constexpr( sizeof( unsigned ) == sizeof( U32 ) ) {
207  return __builtin_uadd_overflow( static_cast<unsigned>( value1 ),
208  static_cast<unsigned>( value2 ),
209  reinterpret_cast<unsigned *>( &result ) );
210  } else if constexpr( sizeof( unsigned long ) == sizeof( U32 ) ) {
211  return __builtin_uaddl_overflow(
212  static_cast<unsigned long>( value1 ),
213  static_cast<unsigned long>( value2 ),
214  reinterpret_cast<unsigned long *>( &result ) );
215  } else {
216  return __builtin_uaddll_overflow(
217  static_cast<unsigned long long>( value1 ),
218  static_cast<unsigned long long>( value2 ),
219  reinterpret_cast<unsigned long long *>( &result ) );
220  }
221 #else
222  return _addcarry_u32( 0, static_cast<std::uint32_t>( value1 ),
223  static_cast<std::uint32_t>( value2 ),
224  reinterpret_cast<std::uint32_t *>( &result ) );
225 #endif
226  }
227 
228  // Adapted from
229  // https://github.com/simdjson/simdjson/blob/master/src/generic/stage1/json_string_scanner.h#L79
230  DAW_ATTRIBUTE_FLATTEN static inline constexpr UInt32
231  find_escaped_branchless( constexpr_exec_tag const &, UInt32 &prev_escaped,
232  UInt32 backslashes ) {
233  backslashes &= ~prev_escaped;
234  UInt32 follow_escape = ( backslashes << 1 ) | prev_escaped;
235  constexpr UInt32 even_bits = 0x5555'5555_u32;
236 
237  UInt32 const odd_seq_start =
238  backslashes & ( ~even_bits ) & ( ~follow_escape );
239  UInt32 seq_start_on_even_bits = 0_u32;
240  prev_escaped = [&] {
241  auto r = odd_seq_start + backslashes;
242  seq_start_on_even_bits = 0x0000'FFFF_u32 & r;
243  r >>= 16U;
244  return r;
245  }( );
246  UInt32 invert_mask = seq_start_on_even_bits << 1U;
247 
248  return ( even_bits ^ invert_mask ) & follow_escape;
249  }
250 
251  DAW_ATTRIBUTE_FLATTEN static inline UInt32 prefix_xor( sse42_exec_tag const &,
252  UInt32 bitmask ) {
253  __m128i const all_ones = _mm_set1_epi8( '\xFF' );
254  __m128i const result = _mm_clmulepi64_si128(
255  _mm_set_epi32( 0, 0, 0, static_cast<std::int32_t>( bitmask ) ), all_ones,
256  0 );
257  return to_uint32( _mm_cvtsi128_si32( result ) );
258  }
259 
260  template<bool is_unchecked_input>
261  static inline char const *
262  mem_skip_until_end_of_string( simd_exec_tag const &tag, char const *first,
263  char const *const last ) {
264  UInt32 prev_escapes = 0_u32;
265  while( last - first >= 16 ) {
266  auto const val0 = uload16_char_data( tag, first );
267  UInt32 const backslashes = mem_find_eq<'\\'>( tag, val0 );
268  UInt32 const escaped =
269  find_escaped_branchless( tag, prev_escapes, backslashes );
270  UInt32 const quotes = mem_find_eq<'"'>( tag, val0 ) & ( ~escaped );
271  UInt32 const in_string = prefix_xor( tag, quotes );
272  if( in_string != 0 ) {
273  first += find_lsb_set( tag, in_string );
274  return first;
275  }
276  first += 16;
277  }
278  if constexpr( is_unchecked_input ) {
279  while( *first != '"' ) {
280  while( not key_table<'"', '\\'>[*first] ) {
281  ++first;
282  }
283  if( *first == '"' ) {
284  return first;
285  }
286  first += 2;
287  }
288  } else {
289  while( DAW_JSON_LIKELY( first < last ) and *first != '"' ) {
290  while( DAW_JSON_LIKELY( first < last ) and
291  not key_table<'"', '\\'>[*first] ) {
292  ++first;
293  }
294  if( first >= last ) {
295  return last;
296  }
297  if( *first == '"' ) {
298  return first;
299  }
300  first += 2;
301  }
302  }
303  return ( is_unchecked_input or DAW_JSON_LIKELY( first < last ) ) ? first
304  : last;
305  }
306 
307  template<bool is_unchecked_input>
308  static inline char const *
309  mem_skip_until_end_of_string( simd_exec_tag const &tag, char const *first,
310  char const *const last,
311  std::ptrdiff_t &first_escape ) {
312  char const *const first_first = first;
313  UInt32 prev_escapes = 0_u32;
314  while( last - first >= 16 ) {
315  auto const val0 = uload16_char_data( tag, first );
316  UInt32 const backslashes = mem_find_eq<'\\'>( tag, val0 );
317  if( ( backslashes != 0 ) & ( first_escape < 0 ) ) {
318  first_escape = find_lsb_set( tag, backslashes );
319  }
320  UInt32 const escaped =
321  find_escaped_branchless( tag, prev_escapes, backslashes );
322  UInt32 const quotes = mem_find_eq<'"'>( tag, val0 ) & ( ~escaped );
323  UInt32 const in_string = prefix_xor( tag, quotes );
324  if( in_string != 0 ) {
325  first += find_lsb_set( tag, in_string );
326  return first;
327  }
328  first += 16;
329  }
330  if constexpr( is_unchecked_input ) {
331  while( *first != '"' ) {
332  while( not key_table<'"', '\\'>[*first] ) {
333  ++first;
334  }
335  if( *first == '"' ) {
336  return first;
337  }
338  if( first_escape < 0 ) {
339  first_escape = first_first - first;
340  }
341  first += 2;
342  }
343  } else {
344  while( DAW_JSON_LIKELY( first < last ) and *first != '"' ) {
345  while( DAW_JSON_LIKELY( first < last ) and
346  not key_table<'"', '\\'>[*first] ) {
347  ++first;
348  }
349  if( first >= last ) {
350  return last;
351  }
352  if( *first == '"' ) {
353  return first;
354  }
355  if( first_escape < 0 ) {
356  first_escape = first_first - first;
357  }
358  first += 2;
359  }
360  }
361  return ( is_unchecked_input or DAW_JSON_LIKELY( first < last ) ) ? first
362  : last;
363  }
364 
365 #endif
366  template<bool is_unchecked_input, char... keys>
367  DAW_ATTRIBUTE_FLATTEN static inline char const *
368  mem_move_to_next_of( runtime_exec_tag const &, char const *first,
369  char const *last ) {
370 
371  if( sizeof...( keys ) == 1 ) {
372  char const key[]{ keys... };
373  char const *ptr = reinterpret_cast<char const *>( std::memchr(
374  first, key[0], static_cast<std::size_t>( last - first ) ) );
375  if( ptr == nullptr ) {
376  ptr = last;
377  }
378  return ptr;
379  } else {
380  constexpr auto eq = []( char l, char r ) { return l == r; };
381  while( is_unchecked_input or first < last ) {
382  char const c = *first;
383  if( ( eq( c, keys ) | ... ) ) {
384  return first;
385  }
386  ++first;
387  }
388  return first;
389  }
390  }
391 
392  template<bool is_unchecked_input, typename ExecTag,
393  std::enable_if_t<std::is_base_of_v<runtime_exec_tag, ExecTag>,
394  std::nullptr_t> = nullptr>
395  DAW_ATTRIBUTE_FLATTEN static inline char const *
396  mem_skip_string( ExecTag const &tag, char const *first,
397  char const *const last ) {
398  return mem_move_to_next_of<is_unchecked_input, '"', '\\'>( tag, first,
399  last );
400  }
401 
402  template<bool is_unchecked_input, typename ExecTag,
403  std::enable_if_t<std::is_base_of_v<runtime_exec_tag, ExecTag>,
404  std::nullptr_t> = nullptr>
405  DAW_ATTRIBUTE_FLATTEN static inline char const *
406  mem_skip_until_end_of_string( ExecTag const &tag, char const *first,
407  char const *const last ) {
408  if constexpr( not is_unchecked_input ) {
409  daw_json_assert( first < last, ErrorReason::UnexpectedEndOfData );
410  }
411  first =
412  mem_move_to_next_of<is_unchecked_input, '\\', '"'>( tag, first, last );
413  while( is_unchecked_input or first < last ) {
414  switch( *first ) {
415  case '"':
416  return first;
417  case '\\':
418  if constexpr( is_unchecked_input ) {
419  ++first;
420  } else {
421  first += static_cast<int>( static_cast<bool>( last - first ) );
422  }
423  break;
424  }
425  ++first;
426  first =
427  mem_move_to_next_of<is_unchecked_input, '\\', '"'>( tag, first, last );
428  }
429  return first;
430  }
431 
432  template<bool is_unchecked_input>
433  DAW_ATTRIBUTE_FLATTEN static inline char const *
434  mem_skip_until_end_of_string( runtime_exec_tag const &tag, char const *first,
435  char const *const last,
436  std::ptrdiff_t &first_escape ) {
437  char const *const first_first = first;
438  if constexpr( not is_unchecked_input ) {
439  daw_json_assert( first < last, ErrorReason::UnexpectedEndOfData );
440  }
441  first =
442  mem_move_to_next_of<is_unchecked_input, '\\', '"'>( tag, first, last );
443  while( is_unchecked_input or first < last ) {
444  switch( *first ) {
445  case '"':
446  return first;
447  case '\\':
448  if( first_escape < 0 ) {
449  first_escape = first_first - first;
450  }
451  if constexpr( is_unchecked_input ) {
452  ++first;
453  } else {
454  first += static_cast<int>( static_cast<bool>( last - first ) );
455  }
456  break;
457  }
458  ++first;
459  first =
460  mem_move_to_next_of<is_unchecked_input, '\\', '"'>( tag, first, last );
461  }
462  return first;
463  }
464 } // namespace daw::json::json_details
daw_json_assert
#define daw_json_assert(Bool,...)
Definition: daw_json_assert.h:196
daw_json_exec_modes.h
daw::json::simd_exec_tag
runtime_exec_tag simd_exec_tag
Definition: daw_json_exec_modes.h:37
DAW_JSON_LIKELY
#define DAW_JSON_LIKELY(Bool)
Definition: daw_json_assert.h:34