From 5beab80553cffd61f546f229de24718f8147d006 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Tue, 6 Mar 2018 18:17:07 +0100 Subject: [PATCH] :hammer: using the SAX-DOM parser --- benchmarks/src/benchmarks.cpp | 56 --------- include/nlohmann/detail/input/json_sax.hpp | 43 +++++-- include/nlohmann/detail/input/parser.hpp | 90 +++++++------ include/nlohmann/json.hpp | 6 +- single_include/nlohmann/json.hpp | 139 +++++++++++++-------- test/src/unit-class_parser.cpp | 5 +- 6 files changed, 173 insertions(+), 166 deletions(-) diff --git a/benchmarks/src/benchmarks.cpp b/benchmarks/src/benchmarks.cpp index 46135e44..bebef603 100644 --- a/benchmarks/src/benchmarks.cpp +++ b/benchmarks/src/benchmarks.cpp @@ -37,35 +37,6 @@ BENCHMARK_CAPTURE(ParseFile, signed_ints, "data/numbers/signed_ints.json"); BENCHMARK_CAPTURE(ParseFile, unsigned_ints, "data/numbers/unsigned_ints.json"); -static void ParseFileSax(benchmark::State& state, const char* filename) -{ - while (state.KeepRunning()) - { - state.PauseTiming(); - auto* f = new std::ifstream(filename); - auto* sdp = new nlohmann::json_sax_dom_parser(); - state.ResumeTiming(); - - json::sax_parse(*f, sdp); - - state.PauseTiming(); - delete f; - delete sdp; - state.ResumeTiming(); - } - - std::ifstream file(filename, std::ios::binary | std::ios::ate); - state.SetBytesProcessed(state.iterations() * file.tellg()); -} -BENCHMARK_CAPTURE(ParseFileSax, jeopardy, "data/jeopardy/jeopardy.json"); -BENCHMARK_CAPTURE(ParseFileSax, canada, "data/nativejson-benchmark/canada.json"); -BENCHMARK_CAPTURE(ParseFileSax, citm_catalog, "data/nativejson-benchmark/citm_catalog.json"); -BENCHMARK_CAPTURE(ParseFileSax, twitter, "data/nativejson-benchmark/twitter.json"); -BENCHMARK_CAPTURE(ParseFileSax, floats, "data/numbers/floats.json"); -BENCHMARK_CAPTURE(ParseFileSax, signed_ints, "data/numbers/signed_ints.json"); -BENCHMARK_CAPTURE(ParseFileSax, unsigned_ints, "data/numbers/unsigned_ints.json"); - - ////////////////////////////////////////////////////////////////////////////// // parse JSON from string ////////////////////////////////////////////////////////////////////////////// @@ -98,33 +69,6 @@ BENCHMARK_CAPTURE(ParseString, floats, "data/numbers/floats.json"); BENCHMARK_CAPTURE(ParseString, signed_ints, "data/numbers/signed_ints.json"); BENCHMARK_CAPTURE(ParseString, unsigned_ints, "data/numbers/unsigned_ints.json"); -static void ParseStringSax(benchmark::State& state, const char* filename) -{ - std::ifstream f(filename); - std::string str((std::istreambuf_iterator(f)), std::istreambuf_iterator()); - - while (state.KeepRunning()) - { - state.PauseTiming(); - auto* sdp = new nlohmann::json_sax_dom_parser(); - state.ResumeTiming(); - - json::sax_parse(str, sdp); - - state.PauseTiming(); - delete sdp; - state.ResumeTiming(); - } - - state.SetBytesProcessed(state.iterations() * str.size()); -} -BENCHMARK_CAPTURE(ParseStringSax, jeopardy, "data/jeopardy/jeopardy.json"); -BENCHMARK_CAPTURE(ParseStringSax, canada, "data/nativejson-benchmark/canada.json"); -BENCHMARK_CAPTURE(ParseStringSax, citm_catalog, "data/nativejson-benchmark/citm_catalog.json"); -BENCHMARK_CAPTURE(ParseStringSax, twitter, "data/nativejson-benchmark/twitter.json"); -BENCHMARK_CAPTURE(ParseStringSax, floats, "data/numbers/floats.json"); -BENCHMARK_CAPTURE(ParseStringSax, signed_ints, "data/numbers/signed_ints.json"); -BENCHMARK_CAPTURE(ParseStringSax, unsigned_ints, "data/numbers/unsigned_ints.json"); ////////////////////////////////////////////////////////////////////////////// // serialize JSON diff --git a/include/nlohmann/detail/input/json_sax.hpp b/include/nlohmann/detail/input/json_sax.hpp index d0161cd4..06227e70 100644 --- a/include/nlohmann/detail/input/json_sax.hpp +++ b/include/nlohmann/detail/input/json_sax.hpp @@ -132,6 +132,10 @@ class json_sax_dom_parser : public json_sax using number_unsigned_t = typename BasicJsonType::number_unsigned_t; using number_float_t = typename BasicJsonType::number_float_t; + json_sax_dom_parser(BasicJsonType& r, const bool allow_exceptions_ = true) + : root(r), allow_exceptions(allow_exceptions_) + {} + bool null() override { handle_value(nullptr); @@ -204,25 +208,30 @@ class json_sax_dom_parser : public json_sax return true; } - bool parse_error(std::size_t position, const std::string&, const std::string& error_msg) override + bool parse_error(std::size_t position, const std::string& token, + const std::string& error_msg) override { - JSON_THROW(BasicJsonType::parse_error::create(101, position, error_msg)); + errored = true; + if (allow_exceptions) + { + if (error_msg == "number overflow") + { + JSON_THROW(BasicJsonType::out_of_range::create(406, "number overflow parsing '" + token + "'")); + } + else + { + JSON_THROW(BasicJsonType::parse_error::create(101, position, error_msg)); + } + } return false; } - BasicJsonType& get_value() + bool is_errored() const { - return root; + return errored; } private: - /// the parsed JSON value - BasicJsonType root; - /// stack to model hierarchy of values - std::vector ref_stack; - /// helper to hold the reference for the next object element - BasicJsonType* object_element = nullptr; - /*! @invariant If the ref stack is empty, then the passed value will be the new root. @@ -234,7 +243,6 @@ class json_sax_dom_parser : public json_sax { if (ref_stack.empty()) { - assert(root.is_null()); root = BasicJsonType(std::forward(v)); return &root; } @@ -254,6 +262,17 @@ class json_sax_dom_parser : public json_sax } } } + + /// the parsed JSON value + BasicJsonType& root; + /// stack to model hierarchy of values + std::vector ref_stack; + /// helper to hold the reference for the next object element + BasicJsonType* object_element = nullptr; + /// whether a syntax error occurred + bool errored = false; + /// whether to throw exceptions in case of errors + const bool allow_exceptions = true; }; } diff --git a/include/nlohmann/detail/input/parser.hpp b/include/nlohmann/detail/input/parser.hpp index ece475bb..23fac952 100644 --- a/include/nlohmann/detail/input/parser.hpp +++ b/include/nlohmann/detail/input/parser.hpp @@ -63,11 +63,10 @@ class parser const parser_callback_t cb = nullptr, const bool allow_exceptions_ = true) : callback(cb), m_lexer(adapter), allow_exceptions(allow_exceptions_) - {} - - parser(detail::input_adapter_t adapter, json_sax_t* s) - : m_lexer(adapter), sax(s) - {} + { + // read first token + get_token(); + } /*! @brief public parser interface @@ -81,31 +80,52 @@ class parser */ void parse(const bool strict, BasicJsonType& result) { - // read first token - get_token(); - - parse_internal(true, result); - result.assert_invariant(); - - // in strict mode, input must be completely read - if (strict) + if (callback) { - get_token(); - expect(token_type::end_of_input); + parse_internal(true, result); + result.assert_invariant(); + + // in strict mode, input must be completely read + if (strict) + { + get_token(); + expect(token_type::end_of_input); + } + + // in case of an error, return discarded value + if (errored) + { + result = value_t::discarded; + return; + } + + // set top-level value to null if it was discarded by the callback + // function + if (result.is_discarded()) + { + result = nullptr; + } } - - // in case of an error, return discarded value - if (errored) + else { - result = value_t::discarded; - return; - } + json_sax_dom_parser sdp(result, allow_exceptions); + sax_parse_internal(&sdp); + result.assert_invariant(); - // set top-level value to null if it was discarded by the callback - // function - if (result.is_discarded()) - { - result = nullptr; + // in strict mode, input must be completely read + if (strict and (get_token() != token_type::end_of_input)) + { + sdp.parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + exception_message(token_type::end_of_input)); + } + + // in case of an error, return discarded value + if (sdp.is_errored()) + { + result = value_t::discarded; + return; + } } } @@ -117,9 +137,6 @@ class parser */ bool accept(const bool strict = true) { - // read first token - get_token(); - if (not accept_internal()) { return false; @@ -129,12 +146,9 @@ class parser return not strict or (get_token() == token_type::end_of_input); } - bool sax_parse() + bool sax_parse(json_sax_t* sax) { - // read first token - get_token(); - - return sax_parse_internal(); + return sax_parse_internal(sax); } private: @@ -535,7 +549,7 @@ class parser } } - bool sax_parse_internal() + bool sax_parse_internal(json_sax_t* sax) { switch (last_token) { @@ -584,7 +598,7 @@ class parser // parse value get_token(); - if (not sax_parse_internal()) + if (not sax_parse_internal(sax)) { return false; } @@ -631,7 +645,7 @@ class parser while (true) { // parse value - if (not sax_parse_internal()) + if (not sax_parse_internal(sax)) { return false; } @@ -782,8 +796,6 @@ class parser bool errored = false; /// whether to throw exceptions in case of errors const bool allow_exceptions = true; - /// associated SAX parse event receiver - json_sax_t* sax = nullptr; }; } } diff --git a/include/nlohmann/json.hpp b/include/nlohmann/json.hpp index 7eb0a352..cef504ff 100644 --- a/include/nlohmann/json.hpp +++ b/include/nlohmann/json.hpp @@ -5930,12 +5930,12 @@ class basic_json static bool sax_parse(detail::input_adapter i, json_sax_t* sax) { - return parser(i, sax).sax_parse(); + return parser(i).sax_parse(sax); } static bool sax_parse(detail::input_adapter& i, json_sax_t* sax) { - return parser(i, sax).sax_parse(); + return parser(i).sax_parse(sax); } /*! @@ -6013,7 +6013,7 @@ class basic_json typename std::iterator_traits::iterator_category>::value, int>::type = 0> static bool sax_parse(IteratorType first, IteratorType last, json_sax_t* sax) { - return parser(detail::input_adapter(first, last), sax).sax_parse(); + return parser(detail::input_adapter(first, last)).sax_parse(sax); } /*! diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index 3b1f7e43..8aaa1f68 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -3266,6 +3266,10 @@ class json_sax_dom_parser : public json_sax using number_unsigned_t = typename BasicJsonType::number_unsigned_t; using number_float_t = typename BasicJsonType::number_float_t; + json_sax_dom_parser(BasicJsonType& r, const bool allow_exceptions_ = true) + : root(r), allow_exceptions(allow_exceptions_) + {} + bool null() override { handle_value(nullptr); @@ -3338,25 +3342,30 @@ class json_sax_dom_parser : public json_sax return true; } - bool parse_error(std::size_t position, const std::string&, const std::string& error_msg) override + bool parse_error(std::size_t position, const std::string& token, + const std::string& error_msg) override { - JSON_THROW(BasicJsonType::parse_error::create(101, position, error_msg)); + errored = true; + if (allow_exceptions) + { + if (error_msg == "number overflow") + { + JSON_THROW(BasicJsonType::out_of_range::create(406, "number overflow parsing '" + token + "'")); + } + else + { + JSON_THROW(BasicJsonType::parse_error::create(101, position, error_msg)); + } + } return false; } - BasicJsonType& get_value() + bool is_errored() const { - return root; + return errored; } private: - /// the parsed JSON value - BasicJsonType root; - /// stack to model hierarchy of values - std::vector ref_stack; - /// helper to hold the reference for the next object element - BasicJsonType* object_element = nullptr; - /*! @invariant If the ref stack is empty, then the passed value will be the new root. @@ -3368,7 +3377,6 @@ class json_sax_dom_parser : public json_sax { if (ref_stack.empty()) { - assert(root.is_null()); root = BasicJsonType(std::forward(v)); return &root; } @@ -3388,6 +3396,17 @@ class json_sax_dom_parser : public json_sax } } } + + /// the parsed JSON value + BasicJsonType& root; + /// stack to model hierarchy of values + std::vector ref_stack; + /// helper to hold the reference for the next object element + BasicJsonType* object_element = nullptr; + /// whether a syntax error occurred + bool errored = false; + /// whether to throw exceptions in case of errors + const bool allow_exceptions = true; }; } @@ -3447,11 +3466,10 @@ class parser const parser_callback_t cb = nullptr, const bool allow_exceptions_ = true) : callback(cb), m_lexer(adapter), allow_exceptions(allow_exceptions_) - {} - - parser(detail::input_adapter_t adapter, json_sax_t* s) - : m_lexer(adapter), sax(s) - {} + { + // read first token + get_token(); + } /*! @brief public parser interface @@ -3465,31 +3483,52 @@ class parser */ void parse(const bool strict, BasicJsonType& result) { - // read first token - get_token(); - - parse_internal(true, result); - result.assert_invariant(); - - // in strict mode, input must be completely read - if (strict) + if (callback) { - get_token(); - expect(token_type::end_of_input); + parse_internal(true, result); + result.assert_invariant(); + + // in strict mode, input must be completely read + if (strict) + { + get_token(); + expect(token_type::end_of_input); + } + + // in case of an error, return discarded value + if (errored) + { + result = value_t::discarded; + return; + } + + // set top-level value to null if it was discarded by the callback + // function + if (result.is_discarded()) + { + result = nullptr; + } } - - // in case of an error, return discarded value - if (errored) + else { - result = value_t::discarded; - return; - } + json_sax_dom_parser sdp(result, allow_exceptions); + sax_parse_internal(&sdp); + result.assert_invariant(); - // set top-level value to null if it was discarded by the callback - // function - if (result.is_discarded()) - { - result = nullptr; + // in strict mode, input must be completely read + if (strict and (get_token() != token_type::end_of_input)) + { + sdp.parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + exception_message(token_type::end_of_input)); + } + + // in case of an error, return discarded value + if (sdp.is_errored()) + { + result = value_t::discarded; + return; + } } } @@ -3501,9 +3540,6 @@ class parser */ bool accept(const bool strict = true) { - // read first token - get_token(); - if (not accept_internal()) { return false; @@ -3513,12 +3549,9 @@ class parser return not strict or (get_token() == token_type::end_of_input); } - bool sax_parse() + bool sax_parse(json_sax_t* sax) { - // read first token - get_token(); - - return sax_parse_internal(); + return sax_parse_internal(sax); } private: @@ -3919,7 +3952,7 @@ class parser } } - bool sax_parse_internal() + bool sax_parse_internal(json_sax_t* sax) { switch (last_token) { @@ -3968,7 +4001,7 @@ class parser // parse value get_token(); - if (not sax_parse_internal()) + if (not sax_parse_internal(sax)) { return false; } @@ -4015,7 +4048,7 @@ class parser while (true) { // parse value - if (not sax_parse_internal()) + if (not sax_parse_internal(sax)) { return false; } @@ -4166,8 +4199,6 @@ class parser bool errored = false; /// whether to throw exceptions in case of errors const bool allow_exceptions = true; - /// associated SAX parse event receiver - json_sax_t* sax = nullptr; }; } } @@ -15989,12 +16020,12 @@ class basic_json static bool sax_parse(detail::input_adapter i, json_sax_t* sax) { - return parser(i, sax).sax_parse(); + return parser(i).sax_parse(sax); } static bool sax_parse(detail::input_adapter& i, json_sax_t* sax) { - return parser(i, sax).sax_parse(); + return parser(i).sax_parse(sax); } /*! @@ -16072,7 +16103,7 @@ class basic_json typename std::iterator_traits::iterator_category>::value, int>::type = 0> static bool sax_parse(IteratorType first, IteratorType last, json_sax_t* sax) { - return parser(detail::input_adapter(first, last), sax).sax_parse(); + return parser(detail::input_adapter(first, last)).sax_parse(sax); } /*! diff --git a/test/src/unit-class_parser.cpp b/test/src/unit-class_parser.cpp index a15ad167..39753d68 100644 --- a/test/src/unit-class_parser.cpp +++ b/test/src/unit-class_parser.cpp @@ -148,9 +148,10 @@ json parser_helper(const std::string& s) CHECK_NOTHROW(json::parser(nlohmann::detail::input_adapter(s), nullptr, false).parse(true, j_nothrow)); CHECK(j_nothrow == j); - nlohmann::json_sax_dom_parser sdp; + json j_sax; + nlohmann::json_sax_dom_parser sdp(j_sax); json::sax_parse(s, &sdp); - CHECK(sdp.get_value() == j); + CHECK(j_sax == j); return j; }