DAW JSON Link
daw_not_const_ex_functions.h
Go to the documentation of this file.
1 // Copyright (c) Darrell Wright
2 //
3 // Distributed under the Boost Software License, Version 1.0. (See accompanying
4 // file LICENSE or copy at http://www.boost.org/LICENSE_1_0.txt)
5 //
6 // Official repository: https://github.com/beached/daw_json_link
7 //
8 
9 #pragma once
10 
11 #include "daw_json_exec_modes.h"
12 #include "version.h"
13 
14 #include <daw/daw_attributes.h>
15 #include <daw/daw_cpp_feature_check.h>
16 #include <daw/daw_do_n.h>
17 #include <daw/daw_likely.h>
18 #include <daw/daw_uint_buffer.h>
19 #include <daw/daw_unreachable.h>
20 
21 #if defined( DAW_ALLOW_SSE42 )
22 #include <emmintrin.h>
23 #include <nmmintrin.h>
24 #include <smmintrin.h>
25 #include <tmmintrin.h>
26 #include <wmmintrin.h>
27 #include <xmmintrin.h>
28 #ifdef _MSC_VER
29 #include <intrin.h>
30 #endif
31 #endif
32 
33 #include <ciso646>
34 #include <cstddef>
35 #include <cstring>
36 
37 namespace daw::json {
38  inline namespace DAW_JSON_VER {
39  namespace json_details {
40  DAW_ATTRIB_FLATINLINE inline constexpr bool
41  is_escaped( char const *ptr, char const *min_ptr ) {
42  if( *( ptr - 1 ) != '\\' ) {
43  return false;
44  }
45  if( ( ptr - min_ptr ) < 2 ) {
46  return false;
47  }
48  return *( ptr - 2 ) != '\\';
49  }
50 
51  inline std::ptrdiff_t find_lsb_set( runtime_exec_tag, UInt32 value ) {
52 #if DAW_HAS_BUILTIN( __builtin_ffs )
53  return __builtin_ffs( static_cast<int>( value ) ) - 1;
54 #elif defined( _MSC_VER )
55  unsigned long index;
56  _BitScanForward( &index, static_cast<int>( value ) );
57  return static_cast<std::ptrdiff_t>( index );
58 #else
59  (void)value;
60  DAW_UNREACHABLE( );
61 #endif
62  }
63 
64 #if defined( DAW_ALLOW_SSE42 )
65  DAW_ATTRIB_FLATINLINE inline __m128i
66  set_reverse( char c0, char c1 = 0, char c2 = 0, char c3 = 0, char c4 = 0,
67  char c5 = 0, char c6 = 0, char c7 = 0, char c8 = 0,
68  char c9 = 0, char c10 = 0, char c11 = 0, char c12 = 0,
69  char c13 = 0, char c14 = 0, char c15 = 0 ) {
70  return _mm_set_epi8( c15, c14, c13, c12, c11, c10, c9, c8, c7, c6, c5,
71  c4, c3, c2, c1, c0 );
72  }
73 
74  DAW_ATTRIB_FLATINLINE inline __m128i
75  uload16_char_data( sse42_exec_tag, char const *ptr ) {
76  return _mm_loadu_si128( reinterpret_cast<__m128i const *>( ptr ) );
77  }
78 
79  DAW_ATTRIB_FLATINLINE inline __m128i load16_char_data( sse42_exec_tag,
80  char const *ptr ) {
81  return _mm_load_si128( reinterpret_cast<__m128i const *>( ptr ) );
82  }
83 
84  template<char k>
85  DAW_ATTRIB_FLATINLINE inline UInt32 mem_find_eq( sse42_exec_tag,
86  __m128i block ) {
87  __m128i const keys = _mm_set1_epi8( k );
88  __m128i const found = _mm_cmpeq_epi8( block, keys );
89  return to_uint32( _mm_movemask_epi8( found ) );
90  }
91 
92  template<unsigned char k>
93  DAW_ATTRIB_FLATINLINE inline UInt32 mem_find_gt( sse42_exec_tag,
94  __m128i block ) {
95  static __m128i const keys = _mm_set1_epi8( k );
96  __m128i const found = _mm_cmpgt_epi8( block, keys );
97  return to_uint32( _mm_movemask_epi8( found ) );
98  }
99 
100  template<bool is_unchecked_input, char... keys, typename CharT>
101  DAW_ATTRIB_FLATINLINE inline CharT *
102  mem_move_to_next_of( sse42_exec_tag tag, CharT *first,
103  CharT *const last ) {
104 
105  while( last - first >= 16 ) {
106  auto const val0 = uload16_char_data( tag, first );
107  auto const key_positions = ( mem_find_eq<keys>( tag, val0 ) | ... );
108  if( key_positions != 0 ) {
109  return first + find_lsb_set( tag, key_positions );
110  }
111  first += 16;
112  }
113  __m128i val1{ };
114  auto const max_pos = last - first;
115  memcpy( &val1, first, static_cast<std::size_t>( max_pos ) );
116  auto const key_positions = ( mem_find_eq<keys>( tag, val1 ) | ... );
117  if( key_positions != 0 ) {
118  auto const offset = find_lsb_set( tag, key_positions );
119  if( offset >= max_pos ) {
120  return last;
121  }
122  return first + offset;
123  }
124  return last;
125  }
126 
127  template<bool is_unchecked_input, char... keys, typename CharT>
128  DAW_ATTRIB_FLATINLINE inline CharT *
129  mem_move_to_next_not_of( sse42_exec_tag tag, CharT *first, CharT *last ) {
130  static constexpr int keys_len = static_cast<int>( sizeof...( keys ) );
131  static_assert( keys_len <= 16 );
132  __m128i const a = set_reverse( keys... );
133  static constexpr int compare_mode =
134  _SIDD_SBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_NEGATIVE_POLARITY;
135 
136  while( last - first >= 16 ) {
137  auto const b = uload16_char_data( tag, first );
138  int const result = _mm_cmpestri( a, keys_len, b, 16, compare_mode );
139  first += result;
140  if( result < 16 ) {
141  return first;
142  }
143  }
144  __m128i b{ };
145  auto const max_pos = last - first;
146  int const result = _mm_cmpestri( a, keys_len, b, 16, compare_mode );
147  if( result < max_pos ) {
148  return first + result;
149  }
150  return last;
151  }
152 
153  template<typename U32>
154  DAW_ATTRIB_FLATINLINE inline bool add_overflow( U32 value1, U32 value2,
155  U32 &result ) {
156  static_assert( sizeof( U32 ) <= sizeof( unsigned long long ) );
157  static_assert( sizeof( U32 ) == 4 );
158 #if DAW_HAS_BUILTIN( __builtin_uadd_overflow ) and \
159  DAW_HAS_BUILTIN( __builtin_uaddl_overflow ) and \
160  DAW_HAS_BUILTIN( __builtin_uaddll_overflow )
161  if constexpr( sizeof( unsigned ) == sizeof( U32 ) ) {
162  return __builtin_uadd_overflow(
163  static_cast<unsigned>( value1 ), static_cast<unsigned>( value2 ),
164  reinterpret_cast<unsigned *>( &result ) );
165  } else if constexpr( sizeof( unsigned long ) == sizeof( U32 ) ) {
166  return __builtin_uaddl_overflow(
167  static_cast<unsigned long>( value1 ),
168  static_cast<unsigned long>( value2 ),
169  reinterpret_cast<unsigned long *>( &result ) );
170  } else {
171  return __builtin_uaddll_overflow(
172  static_cast<unsigned long long>( value1 ),
173  static_cast<unsigned long long>( value2 ),
174  reinterpret_cast<unsigned long long *>( &result ) );
175  }
176 #else
177  return _addcarry_u32( 0, static_cast<std::uint32_t>( value1 ),
178  static_cast<std::uint32_t>( value2 ),
179  reinterpret_cast<std::uint32_t *>( &result ) );
180 #endif
181  }
182 
183  // Adapted from
184  // https://github.com/simdjson/simdjson/blob/master/src/generic/stage1/json_string_scanner.h#L79
185  DAW_ATTRIB_FLATINLINE inline constexpr UInt32
186  find_escaped_branchless( constexpr_exec_tag, UInt32 &prev_escaped,
187  UInt32 backslashes ) {
188  backslashes &= ~prev_escaped;
189  UInt32 follow_escape = ( backslashes << 1 ) | prev_escaped;
190  constexpr UInt32 even_bits = 0x5555'5555_u32;
191 
192  UInt32 const odd_seq_start =
193  backslashes & ( ~even_bits ) & ( ~follow_escape );
194  UInt32 seq_start_on_even_bits = 0_u32;
195  prev_escaped = [&] {
196  auto r = odd_seq_start + backslashes;
197  seq_start_on_even_bits = 0x0000'FFFF_u32 & r;
198  r >>= 16U;
199  return r;
200  }( );
201  UInt32 invert_mask = seq_start_on_even_bits << 1U;
202 
203  return ( even_bits ^ invert_mask ) & follow_escape;
204  }
205 
206  DAW_ATTRIB_FLATINLINE inline UInt32 prefix_xor( sse42_exec_tag,
207  UInt32 bitmask ) {
208  __m128i const all_ones = _mm_set1_epi8( '\xFF' );
209  __m128i const result = _mm_clmulepi64_si128(
210  _mm_set_epi32( 0, 0, 0, static_cast<std::int32_t>( bitmask ) ),
211  all_ones, 0 );
212  return to_uint32( _mm_cvtsi128_si32( result ) );
213  }
214 
215  template<bool is_unchecked_input, typename CharT>
216  inline CharT *mem_skip_until_end_of_string( simd_exec_tag tag,
217  CharT *first,
218  CharT *const last ) {
219  UInt32 prev_escapes = 0_u32;
220  while( last - first >= 16 ) {
221  auto const val0 = uload16_char_data( tag, first );
222  UInt32 const backslashes = mem_find_eq<'\\'>( tag, val0 );
223  UInt32 const escaped =
224  find_escaped_branchless( tag, prev_escapes, backslashes );
225  UInt32 const quotes = mem_find_eq<'"'>( tag, val0 ) & ( ~escaped );
226  UInt32 const in_string = prefix_xor( tag, quotes );
227  if( in_string != 0 ) {
228  first += find_lsb_set( tag, in_string );
229  return first;
230  }
231  first += 16;
232  }
233  if constexpr( is_unchecked_input ) {
234  while( *first != '"' ) {
235  while( not key_table<'"', '\\'>[*first] ) {
236  ++first;
237  }
238  if( *first == '"' ) {
239  return first;
240  }
241  first += 2;
242  }
243  } else {
244  while( DAW_LIKELY( first < last ) and *first != '"' ) {
245  while( DAW_LIKELY( first < last ) and
246  not key_table<'"', '\\'>[*first] ) {
247  ++first;
248  }
249  if( first >= last ) {
250  return last;
251  }
252  if( *first == '"' ) {
253  return first;
254  }
255  first += 2;
256  }
257  }
258  return ( is_unchecked_input or DAW_LIKELY( first < last ) ) ? first
259  : last;
260  }
261 
262  template<bool is_unchecked_input, typename CharT>
263  inline CharT *
264  mem_skip_until_end_of_string( simd_exec_tag tag, CharT *first,
265  CharT *const last,
266  std::ptrdiff_t &first_escape ) {
267  CharT *const first_first = first;
268  UInt32 prev_escapes = 0_u32;
269  while( last - first >= 16 ) {
270  auto const val0 = uload16_char_data( tag, first );
271  UInt32 const backslashes = mem_find_eq<'\\'>( tag, val0 );
272  if( ( backslashes != 0 ) & ( first_escape < 0 ) ) {
273  first_escape = find_lsb_set( tag, backslashes );
274  }
275  UInt32 const escaped =
276  find_escaped_branchless( tag, prev_escapes, backslashes );
277  UInt32 const quotes = mem_find_eq<'"'>( tag, val0 ) & ( ~escaped );
278  UInt32 const in_string = prefix_xor( tag, quotes );
279  if( in_string != 0 ) {
280  first += find_lsb_set( tag, in_string );
281  return first;
282  }
283  first += 16;
284  }
285  if constexpr( is_unchecked_input ) {
286  while( *first != '"' ) {
287  while( not key_table<'"', '\\'>[*first] ) {
288  ++first;
289  }
290  if( *first == '"' ) {
291  return first;
292  }
293  if( first_escape < 0 ) {
294  first_escape = first_first - first;
295  }
296  first += 2;
297  }
298  } else {
299  while( DAW_LIKELY( first < last ) and *first != '"' ) {
300  while( DAW_LIKELY( first < last ) and
301  not key_table<'"', '\\'>[*first] ) {
302  ++first;
303  }
304  if( first >= last ) {
305  return last;
306  }
307  if( *first == '"' ) {
308  return first;
309  }
310  if( first_escape < 0 ) {
311  first_escape = first_first - first;
312  }
313  first += 2;
314  }
315  }
316  return ( is_unchecked_input or DAW_LIKELY( first < last ) ) ? first
317  : last;
318  }
319 
320 #endif
321  template<bool is_unchecked_input, char... keys, typename CharT>
322  DAW_ATTRIB_FLATINLINE inline CharT *
323  mem_move_to_next_of( runtime_exec_tag, CharT *first, CharT *last ) {
324  if constexpr( sizeof...( keys ) == 1 ) {
325  char const key[]{ keys... };
326  auto *ptr = reinterpret_cast<CharT *>( std::memchr(
327  first, key[0], static_cast<std::size_t>( last - first ) ) );
328  if( ptr == nullptr ) {
329  ptr = last;
330  }
331  return ptr;
332  } else {
333  constexpr auto eq = []( char l, char r ) { return l == r; };
334  while( is_unchecked_input or first < last ) {
335  char const c = *first;
336  if( ( eq( c, keys ) | ... ) ) {
337  return first;
338  }
339  ++first;
340  }
341  return first;
342  }
343  }
344 
345  template<
346  bool is_unchecked_input, typename ExecTag, typename CharT,
347  std::enable_if_t<std::is_base_of<runtime_exec_tag, ExecTag>::value,
348  std::nullptr_t> = nullptr>
349  DAW_ATTRIB_FLATINLINE inline CharT *
350  mem_skip_string( ExecTag const &tag, CharT *first, CharT *const last ) {
351  return mem_move_to_next_of<is_unchecked_input, '"', '\\'>( tag, first,
352  last );
353  }
354 
355  template<
356  bool is_unchecked_input, typename ExecTag, typename CharT,
357  std::enable_if_t<std::is_base_of<runtime_exec_tag, ExecTag>::value,
358  std::nullptr_t> = nullptr>
359  DAW_ATTRIB_FLATINLINE inline CharT *
360  mem_skip_until_end_of_string( ExecTag const &tag, CharT *first,
361  CharT *const last ) {
362  if constexpr( not is_unchecked_input ) {
363  daw_json_assert( first < last, ErrorReason::UnexpectedEndOfData );
364  }
365  first = mem_move_to_next_of<is_unchecked_input, '\\', '"'>( tag, first,
366  last );
367  while( is_unchecked_input or first < last ) {
368  switch( *first ) {
369  case '"':
370  return first;
371  case '\\':
372  if constexpr( is_unchecked_input ) {
373  ++first;
374  } else {
375  first += static_cast<int>( static_cast<bool>( last - first ) );
376  }
377  break;
378  }
379  ++first;
380  first = mem_move_to_next_of<is_unchecked_input, '\\', '"'>(
381  tag, first, last );
382  }
383  return first;
384  }
385 
386  template<bool is_unchecked_input, typename CharT>
387  DAW_ATTRIB_FLATINLINE inline CharT *
389  CharT *const last,
390  std::ptrdiff_t &first_escape ) {
391  CharT *const first_first = first;
392  if constexpr( not is_unchecked_input ) {
393  daw_json_assert( first < last, ErrorReason::UnexpectedEndOfData );
394  }
395  first = mem_move_to_next_of<is_unchecked_input, '\\', '"'>( tag, first,
396  last );
397  while( is_unchecked_input or first < last ) {
398  switch( *first ) {
399  case '"':
400  return first;
401  case '\\':
402  if( first_escape < 0 ) {
403  first_escape = first_first - first;
404  }
405  if constexpr( is_unchecked_input ) {
406  ++first;
407  } else {
408  first += static_cast<int>( static_cast<bool>( last - first ) );
409  }
410  break;
411  }
412  ++first;
413  first = mem_move_to_next_of<is_unchecked_input, '\\', '"'>(
414  tag, first, last );
415  }
416  return first;
417  }
418  } // namespace json_details
419  } // namespace DAW_JSON_VER
420 } // namespace daw::json
#define daw_json_assert(Bool,...)
Definition: daw_json_assert.h:178
DAW_ATTRIB_FLATINLINE CharT * mem_move_to_next_of(runtime_exec_tag, CharT *first, CharT *last)
Definition: daw_not_const_ex_functions.h:323
std::ptrdiff_t find_lsb_set(runtime_exec_tag, UInt32 value)
Definition: daw_not_const_ex_functions.h:51
DAW_ATTRIB_FLATINLINE CharT * mem_skip_string(ExecTag const &tag, CharT *first, CharT *const last)
Definition: daw_not_const_ex_functions.h:350
DAW_ATTRIB_FLATINLINE CharT * mem_skip_until_end_of_string(ExecTag const &tag, CharT *first, CharT *const last)
Definition: daw_not_const_ex_functions.h:360
constexpr DAW_ATTRIB_FLATINLINE bool is_escaped(char const *ptr, char const *min_ptr)
Definition: daw_not_const_ex_functions.h:41
Definition: daw_from_json.h:22
Definition: daw_json_exec_modes.h:28
#define DAW_JSON_VER
Definition: version.h:11