Horizon
Loading...
Searching...
No Matches
parser.hpp
1#pragma once
2
3#include <cmath> // isfinite
4#include <cstdint> // uint8_t
5#include <functional> // function
6#include <string> // string
7#include <utility> // move
8#include <vector> // vector
9
10#include <nlohmann/detail/exceptions.hpp>
11#include <nlohmann/detail/input/input_adapters.hpp>
12#include <nlohmann/detail/input/json_sax.hpp>
13#include <nlohmann/detail/input/lexer.hpp>
14#include <nlohmann/detail/macro_scope.hpp>
15#include <nlohmann/detail/meta/is_sax.hpp>
16#include <nlohmann/detail/value_t.hpp>
17
18namespace nlohmann
19{
20namespace detail
21{
23// parser //
25
26enum class parse_event_t : std::uint8_t
27{
37 key,
39 value
40};
41
42template<typename BasicJsonType>
43using parser_callback_t =
44 std::function<bool(int /*depth*/, parse_event_t /*event*/, BasicJsonType& /*parsed*/)>;
45
51template<typename BasicJsonType, typename InputAdapterType>
52class parser
53{
54 using number_integer_t = typename BasicJsonType::number_integer_t;
55 using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
56 using number_float_t = typename BasicJsonType::number_float_t;
57 using string_t = typename BasicJsonType::string_t;
59 using token_type = typename lexer_t::token_type;
60
61 public:
63 explicit parser(InputAdapterType&& adapter,
64 const parser_callback_t<BasicJsonType> cb = nullptr,
65 const bool allow_exceptions_ = true,
66 const bool skip_comments = false)
67 : callback(cb)
68 , m_lexer(std::move(adapter), skip_comments)
69 , allow_exceptions(allow_exceptions_)
70 {
71 // read first token
72 get_token();
73 }
74
85 void parse(const bool strict, BasicJsonType& result)
86 {
87 if (callback)
88 {
89 json_sax_dom_callback_parser<BasicJsonType> sdp(result, callback, allow_exceptions);
90 sax_parse_internal(&sdp);
91
92 // in strict mode, input must be completely read
93 if (strict && (get_token() != token_type::end_of_input))
94 {
95 sdp.parse_error(m_lexer.get_position(),
96 m_lexer.get_token_string(),
97 parse_error::create(101, m_lexer.get_position(),
98 exception_message(token_type::end_of_input, "value"), BasicJsonType()));
99 }
100
101 // in case of an error, return discarded value
102 if (sdp.is_errored())
103 {
104 result = value_t::discarded;
105 return;
106 }
107
108 // set top-level value to null if it was discarded by the callback
109 // function
110 if (result.is_discarded())
111 {
112 result = nullptr;
113 }
114 }
115 else
116 {
117 json_sax_dom_parser<BasicJsonType> sdp(result, allow_exceptions);
118 sax_parse_internal(&sdp);
119
120 // in strict mode, input must be completely read
121 if (strict && (get_token() != token_type::end_of_input))
122 {
123 sdp.parse_error(m_lexer.get_position(),
124 m_lexer.get_token_string(),
125 parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_of_input, "value"), BasicJsonType()));
126 }
127
128 // in case of an error, return discarded value
129 if (sdp.is_errored())
130 {
131 result = value_t::discarded;
132 return;
133 }
134 }
135
136 result.assert_invariant();
137 }
138
145 bool accept(const bool strict = true)
146 {
148 return sax_parse(&sax_acceptor, strict);
149 }
150
151 template<typename SAX>
152 JSON_HEDLEY_NON_NULL(2)
153 bool sax_parse(SAX* sax, const bool strict = true)
154 {
156 const bool result = sax_parse_internal(sax);
157
158 // strict mode: next byte must be EOF
159 if (result && strict && (get_token() != token_type::end_of_input))
160 {
161 return sax->parse_error(m_lexer.get_position(),
162 m_lexer.get_token_string(),
163 parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_of_input, "value"), BasicJsonType()));
164 }
165
166 return result;
167 }
168
169 private:
170 template<typename SAX>
171 JSON_HEDLEY_NON_NULL(2)
172 bool sax_parse_internal(SAX* sax)
173 {
174 // stack to remember the hierarchy of structured values we are parsing
175 // true = array; false = object
176 std::vector<bool> states;
177 // value to avoid a goto (see comment where set to true)
178 bool skip_to_state_evaluation = false;
179
180 while (true)
181 {
182 if (!skip_to_state_evaluation)
183 {
184 // invariant: get_token() was called before each iteration
185 switch (last_token)
186 {
187 case token_type::begin_object:
188 {
189 if (JSON_HEDLEY_UNLIKELY(!sax->start_object(std::size_t(-1))))
190 {
191 return false;
192 }
193
194 // closing } -> we are done
195 if (get_token() == token_type::end_object)
196 {
197 if (JSON_HEDLEY_UNLIKELY(!sax->end_object()))
198 {
199 return false;
200 }
201 break;
202 }
203
204 // parse key
205 if (JSON_HEDLEY_UNLIKELY(last_token != token_type::value_string))
206 {
207 return sax->parse_error(m_lexer.get_position(),
208 m_lexer.get_token_string(),
209 parse_error::create(101, m_lexer.get_position(), exception_message(token_type::value_string, "object key"), BasicJsonType()));
210 }
211 if (JSON_HEDLEY_UNLIKELY(!sax->key(m_lexer.get_string())))
212 {
213 return false;
214 }
215
216 // parse separator (:)
217 if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::name_separator))
218 {
219 return sax->parse_error(m_lexer.get_position(),
220 m_lexer.get_token_string(),
221 parse_error::create(101, m_lexer.get_position(), exception_message(token_type::name_separator, "object separator"), BasicJsonType()));
222 }
223
224 // remember we are now inside an object
225 states.push_back(false);
226
227 // parse values
228 get_token();
229 continue;
230 }
231
232 case token_type::begin_array:
233 {
234 if (JSON_HEDLEY_UNLIKELY(!sax->start_array(std::size_t(-1))))
235 {
236 return false;
237 }
238
239 // closing ] -> we are done
240 if (get_token() == token_type::end_array)
241 {
242 if (JSON_HEDLEY_UNLIKELY(!sax->end_array()))
243 {
244 return false;
245 }
246 break;
247 }
248
249 // remember we are now inside an array
250 states.push_back(true);
251
252 // parse values (no need to call get_token)
253 continue;
254 }
255
256 case token_type::value_float:
257 {
258 const auto res = m_lexer.get_number_float();
259
260 if (JSON_HEDLEY_UNLIKELY(!std::isfinite(res)))
261 {
262 return sax->parse_error(m_lexer.get_position(),
263 m_lexer.get_token_string(),
264 out_of_range::create(406, "number overflow parsing '" + m_lexer.get_token_string() + "'", BasicJsonType()));
265 }
266
267 if (JSON_HEDLEY_UNLIKELY(!sax->number_float(res, m_lexer.get_string())))
268 {
269 return false;
270 }
271
272 break;
273 }
274
275 case token_type::literal_false:
276 {
277 if (JSON_HEDLEY_UNLIKELY(!sax->boolean(false)))
278 {
279 return false;
280 }
281 break;
282 }
283
284 case token_type::literal_null:
285 {
286 if (JSON_HEDLEY_UNLIKELY(!sax->null()))
287 {
288 return false;
289 }
290 break;
291 }
292
293 case token_type::literal_true:
294 {
295 if (JSON_HEDLEY_UNLIKELY(!sax->boolean(true)))
296 {
297 return false;
298 }
299 break;
300 }
301
302 case token_type::value_integer:
303 {
304 if (JSON_HEDLEY_UNLIKELY(!sax->number_integer(m_lexer.get_number_integer())))
305 {
306 return false;
307 }
308 break;
309 }
310
311 case token_type::value_string:
312 {
313 if (JSON_HEDLEY_UNLIKELY(!sax->string(m_lexer.get_string())))
314 {
315 return false;
316 }
317 break;
318 }
319
320 case token_type::value_unsigned:
321 {
322 if (JSON_HEDLEY_UNLIKELY(!sax->number_unsigned(m_lexer.get_number_unsigned())))
323 {
324 return false;
325 }
326 break;
327 }
328
329 case token_type::parse_error:
330 {
331 // using "uninitialized" to avoid "expected" message
332 return sax->parse_error(m_lexer.get_position(),
333 m_lexer.get_token_string(),
334 parse_error::create(101, m_lexer.get_position(), exception_message(token_type::uninitialized, "value"), BasicJsonType()));
335 }
336
337 case token_type::uninitialized:
338 case token_type::end_array:
339 case token_type::end_object:
340 case token_type::name_separator:
341 case token_type::value_separator:
342 case token_type::end_of_input:
343 case token_type::literal_or_value:
344 default: // the last token was unexpected
345 {
346 return sax->parse_error(m_lexer.get_position(),
347 m_lexer.get_token_string(),
348 parse_error::create(101, m_lexer.get_position(), exception_message(token_type::literal_or_value, "value"), BasicJsonType()));
349 }
350 }
351 }
352 else
353 {
354 skip_to_state_evaluation = false;
355 }
356
357 // we reached this line after we successfully parsed a value
358 if (states.empty())
359 {
360 // empty stack: we reached the end of the hierarchy: done
361 return true;
362 }
363
364 if (states.back()) // array
365 {
366 // comma -> next value
367 if (get_token() == token_type::value_separator)
368 {
369 // parse a new value
370 get_token();
371 continue;
372 }
373
374 // closing ]
375 if (JSON_HEDLEY_LIKELY(last_token == token_type::end_array))
376 {
377 if (JSON_HEDLEY_UNLIKELY(!sax->end_array()))
378 {
379 return false;
380 }
381
382 // We are done with this array. Before we can parse a
383 // new value, we need to evaluate the new state first.
384 // By setting skip_to_state_evaluation to false, we
385 // are effectively jumping to the beginning of this if.
386 JSON_ASSERT(!states.empty());
387 states.pop_back();
388 skip_to_state_evaluation = true;
389 continue;
390 }
391
392 return sax->parse_error(m_lexer.get_position(),
393 m_lexer.get_token_string(),
394 parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_array, "array"), BasicJsonType()));
395 }
396
397 // states.back() is false -> object
398
399 // comma -> next value
400 if (get_token() == token_type::value_separator)
401 {
402 // parse key
403 if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::value_string))
404 {
405 return sax->parse_error(m_lexer.get_position(),
406 m_lexer.get_token_string(),
407 parse_error::create(101, m_lexer.get_position(), exception_message(token_type::value_string, "object key"), BasicJsonType()));
408 }
409
410 if (JSON_HEDLEY_UNLIKELY(!sax->key(m_lexer.get_string())))
411 {
412 return false;
413 }
414
415 // parse separator (:)
416 if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::name_separator))
417 {
418 return sax->parse_error(m_lexer.get_position(),
419 m_lexer.get_token_string(),
420 parse_error::create(101, m_lexer.get_position(), exception_message(token_type::name_separator, "object separator"), BasicJsonType()));
421 }
422
423 // parse values
424 get_token();
425 continue;
426 }
427
428 // closing }
429 if (JSON_HEDLEY_LIKELY(last_token == token_type::end_object))
430 {
431 if (JSON_HEDLEY_UNLIKELY(!sax->end_object()))
432 {
433 return false;
434 }
435
436 // We are done with this object. Before we can parse a
437 // new value, we need to evaluate the new state first.
438 // By setting skip_to_state_evaluation to false, we
439 // are effectively jumping to the beginning of this if.
440 JSON_ASSERT(!states.empty());
441 states.pop_back();
442 skip_to_state_evaluation = true;
443 continue;
444 }
445
446 return sax->parse_error(m_lexer.get_position(),
447 m_lexer.get_token_string(),
448 parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_object, "object"), BasicJsonType()));
449 }
450 }
451
453 token_type get_token()
454 {
455 return last_token = m_lexer.scan();
456 }
457
458 std::string exception_message(const token_type expected, const std::string& context)
459 {
460 std::string error_msg = "syntax error ";
461
462 if (!context.empty())
463 {
464 error_msg += "while parsing " + context + " ";
465 }
466
467 error_msg += "- ";
468
469 if (last_token == token_type::parse_error)
470 {
471 error_msg += std::string(m_lexer.get_error_message()) + "; last read: '" +
472 m_lexer.get_token_string() + "'";
473 }
474 else
475 {
476 error_msg += "unexpected " + std::string(lexer_t::token_type_name(last_token));
477 }
478
479 if (expected != token_type::uninitialized)
480 {
481 error_msg += "; expected " + std::string(lexer_t::token_type_name(expected));
482 }
483
484 return error_msg;
485 }
486
487 private:
489 const parser_callback_t<BasicJsonType> callback = nullptr;
491 token_type last_token = token_type::uninitialized;
493 lexer_t m_lexer;
495 const bool allow_exceptions = true;
496};
497
498} // namespace detail
499} // namespace nlohmann
Definition json_sax.hpp:636
SAX implementation to create a JSON value from SAX events.
Definition json_sax.hpp:155
JSON_HEDLEY_RETURNS_NON_NULL static JSON_HEDLEY_CONST const char * token_type_name(const token_type t) noexcept
return name of values of type token_type (only used for errors)
Definition lexer.hpp:54
lexical analysis
Definition lexer.hpp:104
string_t & get_string()
return current string value (implicitly resets the token; useful only once)
Definition lexer.hpp:1422
constexpr position_t get_position() const noexcept
return position of last read token
Definition lexer.hpp:1432
constexpr number_integer_t get_number_integer() const noexcept
return integer value
Definition lexer.hpp:1404
constexpr number_unsigned_t get_number_unsigned() const noexcept
return unsigned integer value
Definition lexer.hpp:1410
constexpr number_float_t get_number_float() const noexcept
return floating-point value
Definition lexer.hpp:1416
std::string get_token_string() const
return the last read token (for errors only).
Definition lexer.hpp:1440
JSON_HEDLEY_RETURNS_NON_NULL constexpr const char * get_error_message() const noexcept
return syntax error message
Definition lexer.hpp:1465
static parse_error create(int id_, const position_t &pos, const std::string &what_arg, const BasicJsonType &context)
create a parse error exception
Definition exceptions.hpp:197
syntax analysis
Definition parser.hpp:53
parser(InputAdapterType &&adapter, const parser_callback_t< BasicJsonType > cb=nullptr, const bool allow_exceptions_=true, const bool skip_comments=false)
a parser reading from an input adapter
Definition parser.hpp:63
bool accept(const bool strict=true)
public accept interface
Definition parser.hpp:145
void parse(const bool strict, BasicJsonType &result)
public parser interface
Definition parser.hpp:85
@ discarded
discarded by the parser callback function
parse_event_t
Definition parser.hpp:27
@ value
the parser finished reading a JSON value
@ key
the parser read a key of a value in an object
@ array_end
the parser read ] and finished processing a JSON array
@ array_start
the parser read [ and started to process a JSON array
@ object_start
the parser read { and started to process a JSON object
@ object_end
the parser read } and finished processing a JSON object
@ strict
throw a type_error exception in case of invalid UTF-8
namespace for Niels Lohmann
Definition adl_serializer.hpp:12