DAW JSON Link
daw_json_parse_std_string.h
Go to the documentation of this file.
1 #pragma once
2 
3 #include "daw_json_assert.h"
5 
6 #include <ciso646>
7 #include <string>
8 #include <type_traits>
9 
10 namespace daw::json::json_details {
11  [[nodiscard]] inline constexpr UInt8 to_nibble( unsigned char chr ) {
12  int const b = static_cast<int>( chr );
13  int const maskLetter = ( ( '9' - b ) >> 31 );
14  int const maskSmall = ( ( 'Z' - b ) >> 31 );
15  int const offset = '0' + ( maskLetter & int( 'A' - '0' - 10 ) ) +
16  ( maskSmall & int( 'a' - 'A' ) );
17  auto const result = static_cast<unsigned>( b - offset );
18  return to_uint8( result );
19  }
20 
21  template<bool is_unchecked_input>
22  [[nodiscard]] inline constexpr UInt16
23  byte_from_nibbles( char const *&first ) {
24  auto const n0 = to_nibble( static_cast<unsigned char>( *first++ ) );
25  auto const n1 = to_nibble( static_cast<unsigned char>( *first++ ) );
26  if constexpr( is_unchecked_input ) {
27  daw_json_assert( n0 < 16 and n1 < 16, ErrorReason::InvalidUTFEscape );
28  }
29  return to_uint16( ( n0 << 4U ) | n1 );
30  }
31 
32  constexpr char u32toC( UInt32 value ) {
33  return static_cast<char>( static_cast<unsigned char>( value ) );
34  }
35 
36  template<typename Range>
37  [[nodiscard]] static constexpr char *decode_utf16( Range &rng, char *it ) {
38  constexpr bool is_unchecked_input = Range::is_unchecked_input;
39  char const *first = rng.first;
40  ++first;
41  UInt32 cp = to_uint32( byte_from_nibbles<is_unchecked_input>( first ) )
42  << 8U;
43  cp |= byte_from_nibbles<is_unchecked_input>( first );
44  if( cp <= 0x7FU ) {
45  *it++ = static_cast<char>( static_cast<unsigned char>( cp ) );
46  rng.first = first;
47  return it;
48  }
49 
50  //******************************
51  if( 0xD800U <= cp and cp <= 0xDBFFU ) {
52  cp = ( cp - 0xD800U ) * 0x400U;
53  ++first;
54  daw_json_assert_weak( *first == 'u', ErrorReason::InvalidUTFEscape,
55  rng ); // Expected rng to start with a \\u
56  ++first;
57  auto trailing =
58  to_uint32( byte_from_nibbles<is_unchecked_input>( first ) ) << 8U;
59  trailing |= byte_from_nibbles<is_unchecked_input>( first );
60  trailing -= 0xDC00U;
61  cp += trailing;
62  cp += 0x10000;
63  }
64  // UTF32-> UTF8
65  if( cp >= 0x10000U ) {
66  // 4 bytes
67  char const enc3 = u32toC( ( cp & 0b0011'1111U ) | 0b1000'0000U );
68  char const enc2 =
69  u32toC( ( ( cp >> 6U ) & 0b0011'1111U ) | 0b1000'0000U );
70  char const enc1 =
71  u32toC( ( ( cp >> 12U ) & 0b0011'1111U ) | 0b1000'0000U );
72  char const enc0 = u32toC( ( cp >> 18U ) | 0b1111'0000U );
73  *it++ = enc0;
74  *it++ = enc1;
75  *it++ = enc2;
76  *it++ = enc3;
77  rng.first = first;
78  return it;
79  }
80  //******************************
81  if( cp >= 0x800U ) {
82  // 3 bytes
83  char const enc2 = u32toC( ( cp & 0b0011'1111U ) | 0b1000'0000U );
84  char const enc1 =
85  u32toC( ( ( cp >> 6U ) & 0b0011'1111U ) | 0b1000'0000U );
86  char const enc0 = u32toC( ( cp >> 12U ) | 0b1110'0000U );
87  *it++ = enc0;
88  *it++ = enc1;
89  *it++ = enc2;
90  rng.first = first;
91  return it;
92  }
93  //******************************
94  // cp >= 0x80U
95  // 2 bytes
96  char const enc1 = u32toC( ( cp & 0b0011'1111U ) | 0b1000'0000U );
97  char const enc0 = u32toC( ( cp >> 6U ) | 0b1100'0000U );
98  *it++ = enc0;
99  *it++ = enc1;
100  rng.first = first;
101  return it;
102  }
103 
104  template<typename Range, typename Appender>
105  static constexpr void decode_utf16( Range &rng, Appender &app ) {
106  constexpr bool is_unchecked_input = Range::is_unchecked_input;
107  char const *first = rng.first;
108  ++first;
109  UInt32 cp = to_uint32( byte_from_nibbles<is_unchecked_input>( first ) )
110  << 8U;
111  cp |= byte_from_nibbles<is_unchecked_input>( first );
112  if( cp <= 0x7FU ) {
113  app( u32toC( cp ) );
114  rng.first = first;
115  return;
116  }
117  if( 0xD800U <= cp and cp <= 0xDBFFU ) {
118  cp = ( cp - 0xD800U ) * 0x400U;
119  ++first;
120  daw_json_assert_weak( *first == 'u', ErrorReason::InvalidUTFEscape, rng );
121  ++first;
122  auto trailing =
123  to_uint32( byte_from_nibbles<is_unchecked_input>( first ) ) << 8U;
124  trailing |= byte_from_nibbles<is_unchecked_input>( first );
125  trailing -= 0xDC00U;
126  cp += trailing;
127  cp += 0x10000;
128  }
129  // UTF32-> UTF8
130  if( cp >= 0x10000U ) {
131  // 4 bytes
132  char const enc3 = u32toC( ( cp & 0b0011'1111U ) | 0b1000'0000U );
133  char const enc2 =
134  u32toC( ( ( cp >> 6U ) & 0b0011'1111U ) | 0b1000'0000U );
135  char const enc1 =
136  u32toC( ( ( cp >> 12U ) & 0b0011'1111U ) | 0b1000'0000U );
137  char const enc0 = u32toC( ( cp >> 18U ) | 0b1111'0000U );
138  app( enc0 );
139  app( enc1 );
140  app( enc2 );
141  app( enc3 );
142  rng.first = first;
143  return;
144  }
145  if( cp >= 0x800U ) {
146  // 3 bytes
147  char const enc2 = u32toC( ( cp & 0b0011'1111U ) | 0b1000'0000U );
148  char const enc1 =
149  u32toC( ( ( cp >> 6U ) & 0b0011'1111U ) | 0b1000'0000U );
150  char const enc0 = u32toC( ( cp >> 12U ) | 0b1110'0000U );
151  app( enc0 );
152  app( enc1 );
153  app( enc2 );
154  rng.first = first;
155  return;
156  }
157  // cp >= 0x80U
158  // 2 bytes
159  char const enc1 = u32toC( ( cp & 0b0011'1111U ) | 0b1000'0000U );
160  char const enc0 = u32toC( ( cp >> 6U ) | 0b1100'0000U );
161  app( enc0 );
162  app( enc1 );
163  rng.first = first;
164  }
165 
166  namespace parse_tokens {
167  inline constexpr char const escape_quotes[] = "\\\"";
168  }
169 
170  // Fast path for parsing escaped strings to a std::string with the default
171  // appender
172  template<bool AllowHighEight, typename JsonMember, bool KnownBounds,
173  typename Range>
174  [[nodiscard, maybe_unused]] constexpr json_result<JsonMember>
175  parse_string_known_stdstring( Range &rng ) {
176  using string_type =
177  std::basic_string<char, std::char_traits<char>,
178  typename Range::template allocator_type_as<char>>;
179  string_type result =
180  string_type( rng.size( ), '\0', rng.template get_allocator_for<char>( ) );
181 
182  char *it = result.data( );
183 
184  bool const has_quote = rng.front( ) == '"';
185  if( has_quote ) {
186  rng.remove_prefix( );
187  }
188 
189  if( auto const first_slash = static_cast<std::ptrdiff_t>( rng.counter ) - 1;
190  first_slash > 1 ) {
191  it = std::copy_n( rng.first, first_slash, it );
192  rng.first += first_slash;
193  }
194  while(
195  ( Range::is_unchecked_input or DAW_JSON_LIKELY( rng.has_more( ) ) ) and
196  rng.front( ) != '"' ) {
197  {
198  char const *first = rng.first;
199  char const *const last = rng.last;
200  if constexpr( std::is_same_v<typename Range::exec_tag_t,
201  constexpr_exec_tag> ) {
202  while( not key_table<'"', '\\'>[*first] ) {
203  ++first;
204  daw_json_assert_weak( KnownBounds or first < last,
205  ErrorReason::UnexpectedEndOfData, rng );
206  }
207  } else {
208  first = mem_move_to_next_of<Range::is_unchecked_input, '"', '\\'>(
209  Range::exec_tag, first, last );
210  }
211  it = std::copy( rng.first, first, it );
212  rng.first = first;
213  }
214  if( rng.front( ) == '\\' ) {
215  daw_json_assert_weak( not rng.is_space_unchecked( ),
216  ErrorReason::InvalidUTFCodepoint, rng );
217  rng.remove_prefix( );
218  switch( rng.front( ) ) {
219  case 'b':
220  *it++ = '\b';
221  rng.remove_prefix( );
222  break;
223  case 'f':
224  *it++ = '\f';
225  rng.remove_prefix( );
226  break;
227  case 'n':
228  *it++ = '\n';
229  rng.remove_prefix( );
230  break;
231  case 'r':
232  *it++ = '\r';
233  rng.remove_prefix( );
234  break;
235  case 't':
236  *it++ = '\t';
237  rng.remove_prefix( );
238  break;
239  case 'u':
240  it = decode_utf16( rng, it );
241  break;
242  case '/':
243  case '\\':
244  case '"':
245  *it++ = rng.front( );
246  rng.remove_prefix( );
247  break;
248  default:
249  if constexpr( not AllowHighEight ) {
251  ( not rng.is_space_unchecked( ) ) &
252  ( static_cast<unsigned char>( rng.front( ) ) <= 0x7FU ),
253  ErrorReason::InvalidStringHighASCII, rng );
254  }
255  *it++ = rng.front( );
256  rng.remove_prefix( );
257  }
258  } else {
259  daw_json_assert_weak( not has_quote or rng.is_quotes_checked( ),
260  ErrorReason::InvalidString, rng );
261  }
262  daw_json_assert_weak( not has_quote or rng.has_more( ),
263  ErrorReason::UnexpectedEndOfData, rng );
264  }
265  auto const sz =
266  static_cast<std::size_t>( std::distance( result.data( ), it ) );
267  daw_json_assert_weak( result.size( ) >= sz, ErrorReason::InvalidString,
268  rng );
269  result.resize( sz );
270  if constexpr( std::is_convertible_v<string_type,
271  json_result<JsonMember>> ) {
272  return result;
273  } else {
274  using constructor_t = typename JsonMember::constructor_t;
275  construct_value<json_result<JsonMember>>(
276  constructor_t{ }, rng, result.data( ),
277  result.data( ) + static_cast<std::ptrdiff_t>( result.size( ) ) );
278  }
279  }
280 } // namespace daw::json::json_details
daw_json_assert
#define daw_json_assert(Bool,...)
Definition: daw_json_assert.h:196
daw_json_assert_weak
#define daw_json_assert_weak(Bool,...)
Definition: daw_json_assert.h:206
daw_json_parse_common.h
daw_json_assert.h
DAW_JSON_LIKELY
#define DAW_JSON_LIKELY(Bool)
Definition: daw_json_assert.h:34