DAW JSON Link
daw_json_parse_std_string.h
Go to the documentation of this file.
1// Copyright (c) Darrell Wright
2//
3// Distributed under the Boost Software License, Version 1.0. (See accompanying
4// file LICENSE or copy at http://www.boost.org/LICENSE_1_0.txt)
5//
6// Official repository: https://github.com/beached/daw_json_link
7//
8
9#pragma once
10
11#include "version.h"
12
13#include "daw_json_assert.h"
16
17#include <daw/daw_likely.h>
18
19#include <ciso646>
20#include <string>
21#include <type_traits>
22
23namespace daw::json {
24 inline namespace DAW_JSON_VER {
25 namespace json_details {
26 [[nodiscard]] inline constexpr UInt8 to_nibble( unsigned char chr ) {
27 int const b = static_cast<int>( chr );
28 int const maskLetter = ( ( '9' - b ) >> 31 );
29 int const maskSmall = ( ( 'Z' - b ) >> 31 );
30 int const offset = '0' + ( maskLetter & int( 'A' - '0' - 10 ) ) +
31 ( maskSmall & int( 'a' - 'A' ) );
32 auto const result = static_cast<unsigned>( b - offset );
33 return to_uint8( result );
34 }
35
36 template<bool is_unchecked_input>
37 [[nodiscard]] inline constexpr UInt16
38 byte_from_nibbles( char const *&first ) {
39 auto const n0 = to_nibble( static_cast<unsigned char>( *first++ ) );
40 auto const n1 = to_nibble( static_cast<unsigned char>( *first++ ) );
41 if constexpr( is_unchecked_input ) {
42 daw_json_assert( n0 < 16 and n1 < 16, ErrorReason::InvalidUTFEscape );
43 }
44 return to_uint16( ( n0 << 4U ) | n1 );
45 }
46
47 constexpr char u32toC( UInt32 value ) {
48 return static_cast<char>( static_cast<unsigned char>( value ) );
49 }
50
51 template<typename ParseState>
52 [[nodiscard]] static constexpr char *
53 decode_utf16( ParseState &parse_state, char *it ) {
54 constexpr bool is_unchecked_input = ParseState::is_unchecked_input;
56 ErrorReason::UnexpectedEndOfData, parse_state );
57 char const *first = parse_state.first;
58 ++first;
59 UInt32 cp = to_uint32( byte_from_nibbles<is_unchecked_input>( first ) )
60 << 8U;
61 cp |= byte_from_nibbles<is_unchecked_input>( first );
62 if( cp <= 0x7FU ) {
63 *it++ = static_cast<char>( static_cast<unsigned char>( cp ) );
64 parse_state.first = first;
65 return it;
66 }
67
68 //******************************
69 if( 0xD800U <= cp and cp <= 0xDBFFU ) {
70 cp = ( cp - 0xD800U ) * 0x400U;
71 ++first;
73 ( parse_state.last - first >= 5 ) and *first == 'u',
74 ErrorReason::InvalidUTFEscape,
75 parse_state ); // Expected parse_state to start with a \\u
76 ++first;
77 auto trailing =
78 to_uint32( byte_from_nibbles<is_unchecked_input>( first ) ) << 8U;
79 trailing |= byte_from_nibbles<is_unchecked_input>( first );
80 trailing -= 0xDC00U;
81 cp += trailing;
82 cp += 0x10000;
83 }
84 // UTF32-> UTF8
85 if( cp >= 0x10000U ) {
86 // 4 bytes
87 char const enc3 = u32toC( ( cp & 0b0011'1111U ) | 0b1000'0000U );
88 char const enc2 =
89 u32toC( ( ( cp >> 6U ) & 0b0011'1111U ) | 0b1000'0000U );
90 char const enc1 =
91 u32toC( ( ( cp >> 12U ) & 0b0011'1111U ) | 0b1000'0000U );
92 char const enc0 = u32toC( ( cp >> 18U ) | 0b1111'0000U );
93 *it++ = enc0;
94 *it++ = enc1;
95 *it++ = enc2;
96 *it++ = enc3;
97 parse_state.first = first;
98 return it;
99 }
100 //******************************
101 if( cp >= 0x800U ) {
102 // 3 bytes
103 char const enc2 = u32toC( ( cp & 0b0011'1111U ) | 0b1000'0000U );
104 char const enc1 =
105 u32toC( ( ( cp >> 6U ) & 0b0011'1111U ) | 0b1000'0000U );
106 char const enc0 = u32toC( ( cp >> 12U ) | 0b1110'0000U );
107 *it++ = enc0;
108 *it++ = enc1;
109 *it++ = enc2;
110 parse_state.first = first;
111 return it;
112 }
113 //******************************
114 // cp >= 0x80U
115 // 2 bytes
116 char const enc1 = u32toC( ( cp & 0b0011'1111U ) | 0b1000'0000U );
117 char const enc0 = u32toC( ( cp >> 6U ) | 0b1100'0000U );
118 *it++ = enc0;
119 *it++ = enc1;
120 parse_state.first = first;
121 return it;
122 }
123
124 template<typename ParseState, typename Appender>
125 static constexpr void decode_utf16( ParseState &parse_state,
126 Appender &app ) {
127 constexpr bool is_unchecked_input = ParseState::is_unchecked_input;
128 char const *first = parse_state.first;
129 ++first;
130 UInt32 cp = to_uint32( byte_from_nibbles<is_unchecked_input>( first ) )
131 << 8U;
132 cp |= byte_from_nibbles<is_unchecked_input>( first );
133 if( cp <= 0x7FU ) {
134 app( u32toC( cp ) );
135 parse_state.first = first;
136 return;
137 }
138 if( 0xD800U <= cp and cp <= 0xDBFFU ) {
139 cp = ( cp - 0xD800U ) * 0x400U;
140 ++first;
141 daw_json_assert_weak( *first == 'u', ErrorReason::InvalidUTFEscape,
142 parse_state );
143 ++first;
144 auto trailing =
145 to_uint32( byte_from_nibbles<is_unchecked_input>( first ) ) << 8U;
146 trailing |= byte_from_nibbles<is_unchecked_input>( first );
147 trailing -= 0xDC00U;
148 cp += trailing;
149 cp += 0x10000;
150 }
151 // UTF32-> UTF8
152 if( cp >= 0x10000U ) {
153 // 4 bytes
154 char const enc3 = u32toC( ( cp & 0b0011'1111U ) | 0b1000'0000U );
155 char const enc2 =
156 u32toC( ( ( cp >> 6U ) & 0b0011'1111U ) | 0b1000'0000U );
157 char const enc1 =
158 u32toC( ( ( cp >> 12U ) & 0b0011'1111U ) | 0b1000'0000U );
159 char const enc0 = u32toC( ( cp >> 18U ) | 0b1111'0000U );
160 app( enc0 );
161 app( enc1 );
162 app( enc2 );
163 app( enc3 );
164 parse_state.first = first;
165 return;
166 }
167 if( cp >= 0x800U ) {
168 // 3 bytes
169 char const enc2 = u32toC( ( cp & 0b0011'1111U ) | 0b1000'0000U );
170 char const enc1 =
171 u32toC( ( ( cp >> 6U ) & 0b0011'1111U ) | 0b1000'0000U );
172 char const enc0 = u32toC( ( cp >> 12U ) | 0b1110'0000U );
173 app( enc0 );
174 app( enc1 );
175 app( enc2 );
176 parse_state.first = first;
177 return;
178 }
179 // cp >= 0x80U
180 // 2 bytes
181 char const enc1 = u32toC( ( cp & 0b0011'1111U ) | 0b1000'0000U );
182 char const enc0 = u32toC( ( cp >> 6U ) | 0b1100'0000U );
183 app( enc0 );
184 app( enc1 );
185 parse_state.first = first;
186 }
187
188 namespace parse_tokens {
189 inline constexpr char const escape_quotes[] = "\\\"";
190 }
191
192 // Fast path for parsing escaped strings to a std::string with the default
193 // appender
194 template<bool AllowHighEight, typename JsonMember, bool KnownBounds,
195 typename ParseState>
196 [[nodiscard, maybe_unused]] constexpr auto // json_result<JsonMember>
198 using string_type = json_base_type<JsonMember>;
199 string_type result =
200 string_type( std::size( parse_state ), '\0',
201 parse_state.get_allocator_for( template_arg<char> ) );
202 char *it = std::data( result );
203
204 bool const has_quote = parse_state.front( ) == '"';
205 if( has_quote ) {
206 parse_state.remove_prefix( );
207 }
208
209 if( auto const first_slash =
210 static_cast<std::ptrdiff_t>( parse_state.counter ) - 1;
211 first_slash > 1 ) {
212 it = std::copy_n( parse_state.first, first_slash, it );
213 parse_state.first += first_slash;
214 }
215 constexpr auto pred = []( auto const &r ) {
216 if constexpr( ParseState::is_unchecked_input ) {
217 return DAW_LIKELY( r.front( ) != '"' );
218 } else {
219 return DAW_LIKELY( r.has_more( ) ) and ( r.front( ) != '"' );
220 }
221 };
222
223 while( pred( parse_state ) ) {
224 {
225 char const *first = parse_state.first;
226 char const *const last = parse_state.last;
227 if constexpr( std::is_same<typename ParseState::exec_tag_t,
228 constexpr_exec_tag>::value ) {
229
230 daw_json_assert_weak( KnownBounds or first < last,
231 ErrorReason::UnexpectedEndOfData,
232 parse_state );
233 while( *first != '"' and *first != '\\' ) {
234 ++first;
235 daw_json_assert_weak( KnownBounds or first < last,
236 ErrorReason::UnexpectedEndOfData,
237 parse_state );
238 }
239 } else {
240 first =
241 mem_move_to_next_of<( ParseState::is_unchecked_input or
242 ParseState::is_zero_terminated_string ),
243 '"', '\\'>( ParseState::exec_tag, first,
244 last );
245 }
246 it = daw::algorithm::copy( parse_state.first, first, it );
247 parse_state.first = first;
248 }
249 if( parse_state.front( ) == '\\' ) {
250 parse_state.remove_prefix( );
251 daw_json_assert_weak( not parse_state.is_space_unchecked( ),
252 ErrorReason::InvalidUTFCodepoint,
253 parse_state );
254 switch( parse_state.front( ) ) {
255 case 'b':
256 *it++ = '\b';
257 parse_state.remove_prefix( );
258 break;
259 case 'f':
260 *it++ = '\f';
261 parse_state.remove_prefix( );
262 break;
263 case 'n':
264 *it++ = '\n';
265 parse_state.remove_prefix( );
266 break;
267 case 'r':
268 *it++ = '\r';
269 parse_state.remove_prefix( );
270 break;
271 case 't':
272 *it++ = '\t';
273 parse_state.remove_prefix( );
274 break;
275 case 'u':
277 break;
278 case '/':
279 case '\\':
280 case '"':
281 *it++ = parse_state.front( );
282 parse_state.remove_prefix( );
283 break;
284 default:
285 if constexpr( not AllowHighEight ) {
287 ( not parse_state.is_space_unchecked( ) ) &
288 ( static_cast<unsigned char>( parse_state.front( ) ) <=
289 0x7FU ),
290 ErrorReason::InvalidStringHighASCII, parse_state );
291 }
292 *it++ = parse_state.front( );
293 parse_state.remove_prefix( );
294 }
295 } else {
296 daw_json_assert_weak( not has_quote or
297 parse_state.is_quotes_checked( ),
298 ErrorReason::InvalidString, parse_state );
299 }
300 daw_json_assert_weak( not has_quote or parse_state.has_more( ),
301 ErrorReason::UnexpectedEndOfData, parse_state );
302 }
303 auto const sz =
304 static_cast<std::size_t>( std::distance( std::data( result ), it ) );
305 daw_json_assert_weak( std::size( result ) >= sz,
306 ErrorReason::InvalidString, parse_state );
307 result.resize( sz );
308 if constexpr( std::is_convertible<string_type,
309 json_result<JsonMember>>::value ) {
310 return result;
311 } else {
312 using constructor_t = typename JsonMember::constructor_t;
314 template_args<json_result<JsonMember>, constructor_t>, parse_state,
315 std::data( result ), daw::data_end( result ) );
316 }
317 }
318 } // namespace json_details
319 } // namespace DAW_JSON_VER
320} // namespace daw::json
#define daw_json_assert_weak(Bool,...)
Definition: daw_json_assert.h:190
#define daw_json_assert(Bool,...)
Definition: daw_json_assert.h:179
ParseState & parse_state
Definition: daw_json_parse_class.h:182
Iterator & it
Definition: daw_json_traits.h:251
constexpr char const escape_quotes[]
Definition: daw_json_parse_std_string.h:189
constexpr auto parse_string_known_stdstring(ParseState &parse_state)
Definition: daw_json_parse_std_string.h:197
constexpr UInt16 byte_from_nibbles(char const *&first)
Definition: daw_json_parse_std_string.h:38
DAW_ATTRIB_INLINE CharT * mem_move_to_next_of(runtime_exec_tag, CharT *first, CharT *last)
Definition: daw_not_const_ex_functions.h:353
constexpr UInt8 to_nibble(unsigned char chr)
Definition: daw_json_parse_std_string.h:26
static constexpr char * decode_utf16(ParseState &parse_state, char *it)
Definition: daw_json_parse_std_string.h:53
static constexpr DAW_ATTRIB_FLATINLINE auto construct_value(template_params< Value, Constructor >, ParseState &parse_state, Args &&...args)
Definition: daw_json_parse_common.h:60
typename JsonMember::parse_to_t json_result
Definition: daw_json_parse_common.h:200
constexpr char u32toC(UInt32 value)
Definition: daw_json_parse_std_string.h:47
typename JsonMember::base_type json_base_type
Definition: daw_json_parse_common.h:203
Definition: daw_from_json.h:22
Definition: daw_json_exec_modes.h:19
#define DAW_JSON_VER
The version string used in namespace definitions. Must be a valid namespace name.
Definition: version.h:16