DAW JSON Link
daw_json_parse_std_string.h
Go to the documentation of this file.
1 // Copyright (c) Darrell Wright
2 //
3 // Distributed under the Boost Software License, Version 1.0. (See accompanying
4 // file LICENSE or copy at http://www.boost.org/LICENSE_1_0.txt)
5 //
6 // Official repository: https://github.com/beached/daw_json_link
7 //
8 
9 #pragma once
10 
11 #include "version.h"
12 
13 #include "daw_json_assert.h"
14 #include "daw_json_parse_common.h"
16 
17 #include <daw/daw_likely.h>
18 
19 #include <ciso646>
20 #include <string>
21 #include <type_traits>
22 
23 namespace daw::json {
24  inline namespace DAW_JSON_VER {
25  namespace json_details {
26  [[nodiscard]] inline constexpr UInt8 to_nibble( unsigned char chr ) {
27  int const b = static_cast<int>( chr );
28  int const maskLetter = ( ( '9' - b ) >> 31 );
29  int const maskSmall = ( ( 'Z' - b ) >> 31 );
30  int const offset = '0' + ( maskLetter & int( 'A' - '0' - 10 ) ) +
31  ( maskSmall & int( 'a' - 'A' ) );
32  auto const result = static_cast<unsigned>( b - offset );
33  return to_uint8( result );
34  }
35 
36  template<bool is_unchecked_input>
37  [[nodiscard]] inline constexpr UInt16
38  byte_from_nibbles( char const *&first ) {
39  auto const n0 = to_nibble( static_cast<unsigned char>( *first++ ) );
40  auto const n1 = to_nibble( static_cast<unsigned char>( *first++ ) );
41  if constexpr( is_unchecked_input ) {
42  daw_json_assert( n0 < 16 and n1 < 16, ErrorReason::InvalidUTFEscape );
43  }
44  return to_uint16( ( n0 << 4U ) | n1 );
45  }
46 
47  constexpr char u32toC( UInt32 value ) {
48  return static_cast<char>( static_cast<unsigned char>( value ) );
49  }
50 
51  template<typename ParseState>
52  [[nodiscard]] static constexpr char *
53  decode_utf16( ParseState &parse_state, char *it ) {
54  constexpr bool is_unchecked_input = ParseState::is_unchecked_input;
55  char const *first = parse_state.first;
56  ++first;
57  UInt32 cp = to_uint32( byte_from_nibbles<is_unchecked_input>( first ) )
58  << 8U;
59  cp |= byte_from_nibbles<is_unchecked_input>( first );
60  if( cp <= 0x7FU ) {
61  *it++ = static_cast<char>( static_cast<unsigned char>( cp ) );
62  parse_state.first = first;
63  return it;
64  }
65 
66  //******************************
67  if( 0xD800U <= cp and cp <= 0xDBFFU ) {
68  cp = ( cp - 0xD800U ) * 0x400U;
69  ++first;
71  *first == 'u', ErrorReason::InvalidUTFEscape,
72  parse_state ); // Expected parse_state to start with a \\u
73  ++first;
74  auto trailing =
75  to_uint32( byte_from_nibbles<is_unchecked_input>( first ) ) << 8U;
76  trailing |= byte_from_nibbles<is_unchecked_input>( first );
77  trailing -= 0xDC00U;
78  cp += trailing;
79  cp += 0x10000;
80  }
81  // UTF32-> UTF8
82  if( cp >= 0x10000U ) {
83  // 4 bytes
84  char const enc3 = u32toC( ( cp & 0b0011'1111U ) | 0b1000'0000U );
85  char const enc2 =
86  u32toC( ( ( cp >> 6U ) & 0b0011'1111U ) | 0b1000'0000U );
87  char const enc1 =
88  u32toC( ( ( cp >> 12U ) & 0b0011'1111U ) | 0b1000'0000U );
89  char const enc0 = u32toC( ( cp >> 18U ) | 0b1111'0000U );
90  *it++ = enc0;
91  *it++ = enc1;
92  *it++ = enc2;
93  *it++ = enc3;
94  parse_state.first = first;
95  return it;
96  }
97  //******************************
98  if( cp >= 0x800U ) {
99  // 3 bytes
100  char const enc2 = u32toC( ( cp & 0b0011'1111U ) | 0b1000'0000U );
101  char const enc1 =
102  u32toC( ( ( cp >> 6U ) & 0b0011'1111U ) | 0b1000'0000U );
103  char const enc0 = u32toC( ( cp >> 12U ) | 0b1110'0000U );
104  *it++ = enc0;
105  *it++ = enc1;
106  *it++ = enc2;
107  parse_state.first = first;
108  return it;
109  }
110  //******************************
111  // cp >= 0x80U
112  // 2 bytes
113  char const enc1 = u32toC( ( cp & 0b0011'1111U ) | 0b1000'0000U );
114  char const enc0 = u32toC( ( cp >> 6U ) | 0b1100'0000U );
115  *it++ = enc0;
116  *it++ = enc1;
117  parse_state.first = first;
118  return it;
119  }
120 
121  template<typename ParseState, typename Appender>
122  static constexpr void decode_utf16( ParseState &parse_state,
123  Appender &app ) {
124  constexpr bool is_unchecked_input = ParseState::is_unchecked_input;
125  char const *first = parse_state.first;
126  ++first;
127  UInt32 cp = to_uint32( byte_from_nibbles<is_unchecked_input>( first ) )
128  << 8U;
129  cp |= byte_from_nibbles<is_unchecked_input>( first );
130  if( cp <= 0x7FU ) {
131  app( u32toC( cp ) );
132  parse_state.first = first;
133  return;
134  }
135  if( 0xD800U <= cp and cp <= 0xDBFFU ) {
136  cp = ( cp - 0xD800U ) * 0x400U;
137  ++first;
138  daw_json_assert_weak( *first == 'u', ErrorReason::InvalidUTFEscape,
139  parse_state );
140  ++first;
141  auto trailing =
142  to_uint32( byte_from_nibbles<is_unchecked_input>( first ) ) << 8U;
143  trailing |= byte_from_nibbles<is_unchecked_input>( first );
144  trailing -= 0xDC00U;
145  cp += trailing;
146  cp += 0x10000;
147  }
148  // UTF32-> UTF8
149  if( cp >= 0x10000U ) {
150  // 4 bytes
151  char const enc3 = u32toC( ( cp & 0b0011'1111U ) | 0b1000'0000U );
152  char const enc2 =
153  u32toC( ( ( cp >> 6U ) & 0b0011'1111U ) | 0b1000'0000U );
154  char const enc1 =
155  u32toC( ( ( cp >> 12U ) & 0b0011'1111U ) | 0b1000'0000U );
156  char const enc0 = u32toC( ( cp >> 18U ) | 0b1111'0000U );
157  app( enc0 );
158  app( enc1 );
159  app( enc2 );
160  app( enc3 );
161  parse_state.first = first;
162  return;
163  }
164  if( cp >= 0x800U ) {
165  // 3 bytes
166  char const enc2 = u32toC( ( cp & 0b0011'1111U ) | 0b1000'0000U );
167  char const enc1 =
168  u32toC( ( ( cp >> 6U ) & 0b0011'1111U ) | 0b1000'0000U );
169  char const enc0 = u32toC( ( cp >> 12U ) | 0b1110'0000U );
170  app( enc0 );
171  app( enc1 );
172  app( enc2 );
173  parse_state.first = first;
174  return;
175  }
176  // cp >= 0x80U
177  // 2 bytes
178  char const enc1 = u32toC( ( cp & 0b0011'1111U ) | 0b1000'0000U );
179  char const enc0 = u32toC( ( cp >> 6U ) | 0b1100'0000U );
180  app( enc0 );
181  app( enc1 );
182  parse_state.first = first;
183  }
184 
185  namespace parse_tokens {
186  inline constexpr char const escape_quotes[] = "\\\"";
187  }
188 
189  // Fast path for parsing escaped strings to a std::string with the default
190  // appender
191  template<bool AllowHighEight, typename JsonMember, bool KnownBounds,
192  typename ParseState>
193  [[nodiscard, maybe_unused]] constexpr auto // json_result<JsonMember>
195  using string_type = json_base_type<JsonMember>;
196  string_type result =
197  string_type( std::size( parse_state ), '\0',
198  parse_state.get_allocator_for( template_arg<char> ) );
199  char *it = std::data( result );
200 
201  bool const has_quote = parse_state.front( ) == '"';
202  if( has_quote ) {
203  parse_state.remove_prefix( );
204  }
205 
206  if( auto const first_slash =
207  static_cast<std::ptrdiff_t>( parse_state.counter ) - 1;
208  first_slash > 1 ) {
209  it = std::copy_n( parse_state.first, first_slash, it );
210  parse_state.first += first_slash;
211  }
212  constexpr auto pred = []( auto const &r ) {
213  if constexpr( ParseState::is_unchecked_input ) {
214  return DAW_LIKELY( r.front( ) != '"' );
215  } else {
216  return DAW_LIKELY( r.has_more( ) ) & ( r.front( ) != '"' );
217  }
218  };
219 
220  while( pred( parse_state ) ) {
221  {
222  char const *first = parse_state.first;
223  char const *const last = parse_state.last;
224  if constexpr( std::is_same<typename ParseState::exec_tag_t,
225  constexpr_exec_tag>::value ) {
226 
227  daw_json_assert_weak( KnownBounds or first < last,
228  ErrorReason::UnexpectedEndOfData,
229  parse_state );
230  while( *first != '"' and *first != '\\' ) {
231  ++first;
232  daw_json_assert_weak( KnownBounds or first < last,
233  ErrorReason::UnexpectedEndOfData,
234  parse_state );
235  }
236  } else {
237  first =
238  mem_move_to_next_of<( ParseState::is_unchecked_input or
239  ParseState::is_zero_terminated_string ),
240  '"', '\\'>( ParseState::exec_tag, first,
241  last );
242  }
243  it = daw::algorithm::copy( parse_state.first, first, it );
244  parse_state.first = first;
245  }
246  if( parse_state.front( ) == '\\' ) {
247  parse_state.remove_prefix( );
248  daw_json_assert_weak( not parse_state.is_space_unchecked( ),
249  ErrorReason::InvalidUTFCodepoint,
250  parse_state );
251  switch( parse_state.front( ) ) {
252  case 'b':
253  *it++ = '\b';
254  parse_state.remove_prefix( );
255  break;
256  case 'f':
257  *it++ = '\f';
258  parse_state.remove_prefix( );
259  break;
260  case 'n':
261  *it++ = '\n';
262  parse_state.remove_prefix( );
263  break;
264  case 'r':
265  *it++ = '\r';
266  parse_state.remove_prefix( );
267  break;
268  case 't':
269  *it++ = '\t';
270  parse_state.remove_prefix( );
271  break;
272  case 'u':
274  break;
275  case '/':
276  case '\\':
277  case '"':
278  *it++ = parse_state.front( );
279  parse_state.remove_prefix( );
280  break;
281  default:
282  if constexpr( not AllowHighEight ) {
284  ( not parse_state.is_space_unchecked( ) ) &
285  ( static_cast<unsigned char>( parse_state.front( ) ) <=
286  0x7FU ),
287  ErrorReason::InvalidStringHighASCII, parse_state );
288  }
289  *it++ = parse_state.front( );
290  parse_state.remove_prefix( );
291  }
292  } else {
293  daw_json_assert_weak( not has_quote or
294  parse_state.is_quotes_checked( ),
295  ErrorReason::InvalidString, parse_state );
296  }
297  daw_json_assert_weak( not has_quote or parse_state.has_more( ),
298  ErrorReason::UnexpectedEndOfData, parse_state );
299  }
300  auto const sz =
301  static_cast<std::size_t>( std::distance( std::data( result ), it ) );
302  daw_json_assert_weak( std::size( result ) >= sz,
303  ErrorReason::InvalidString, parse_state );
304  result.resize( sz );
305  if constexpr( std::is_convertible<string_type,
306  json_result<JsonMember>>::value ) {
307  return result;
308  } else {
309  using constructor_t = typename JsonMember::constructor_t;
311  template_args<json_result<JsonMember>, constructor_t>, parse_state,
312  std::data( result ), daw::data_end( result ) );
313  }
314  }
315  } // namespace json_details
316  } // namespace DAW_JSON_VER
317 } // namespace daw::json
#define daw_json_assert_weak(Bool,...)
Definition: daw_json_assert.h:189
#define daw_json_assert(Bool,...)
Definition: daw_json_assert.h:178
ParseState & parse_state
Definition: daw_json_parse_class.h:201
Iterator & it
Definition: daw_json_traits.h:231
constexpr char const escape_quotes[]
Definition: daw_json_parse_std_string.h:186
DAW_ATTRIB_FLATINLINE CharT * mem_move_to_next_of(runtime_exec_tag, CharT *first, CharT *last)
Definition: daw_not_const_ex_functions.h:323
constexpr auto parse_string_known_stdstring(ParseState &parse_state)
Definition: daw_json_parse_std_string.h:194
static constexpr char * decode_utf16(ParseState &parse_state, char *it)
Definition: daw_json_parse_std_string.h:53
constexpr UInt16 byte_from_nibbles(char const *&first)
Definition: daw_json_parse_std_string.h:38
constexpr UInt8 to_nibble(unsigned char chr)
Definition: daw_json_parse_std_string.h:26
static constexpr DAW_ATTRIB_FLATINLINE auto construct_value(template_params< Value, Constructor >, ParseState &parse_state, Args &&...args)
Definition: daw_json_parse_common.h:63
typename JsonMember::parse_to_t json_result
Definition: daw_json_parse_common.h:205
constexpr char u32toC(UInt32 value)
Definition: daw_json_parse_std_string.h:47
typename JsonMember::base_type json_base_type
Definition: daw_json_parse_common.h:208
Definition: daw_from_json.h:22
Definition: daw_json_exec_modes.h:19
#define DAW_JSON_VER
Definition: version.h:11