From 374ebacc51cea921414e64806caab254569b23f2 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Sat, 24 Feb 2018 18:04:07 +0100 Subject: [PATCH] :sparkles: added a SAX parser #971 --- include/nlohmann/detail/input/parser.hpp | 223 ++++++++++++++ include/nlohmann/json.hpp | 21 ++ single_include/nlohmann/json.hpp | 244 +++++++++++++++ test/src/unit-deserialization.cpp | 369 ++++++++++++++++++++++- 4 files changed, 854 insertions(+), 3 deletions(-) diff --git a/include/nlohmann/detail/input/parser.hpp b/include/nlohmann/detail/input/parser.hpp index 63e8541f..009ea994 100644 --- a/include/nlohmann/detail/input/parser.hpp +++ b/include/nlohmann/detail/input/parser.hpp @@ -52,6 +52,53 @@ class parser value }; + struct SAX + { + /// a null value was read + virtual bool null() = 0; + + /// a boolean value was read + virtual bool boolean(bool) = 0; + + /// an integer number was read + virtual bool number_integer(number_integer_t) = 0; + + /// an unsigned integer number was read + virtual bool number_unsigned(number_unsigned_t) = 0; + + /// a floating-point number was read + /// the string parameter contains the raw number value + virtual bool number_float(number_float_t, const std::string&) = 0; + + /// a string value was read + virtual bool string(const std::string&) = 0; + + /// the beginning of an object was read + /// binary formats may report the number of elements + virtual bool start_object(std::size_t elements) = 0; + + /// an object key was read + virtual bool key(const std::string&) = 0; + + /// the end of an object was read + virtual bool end_object() = 0; + + /// the beginning of an array was read + /// binary formats may report the number of elements + virtual bool start_array(std::size_t elements) = 0; + + /// the end of an array was read + virtual bool end_array() = 0; + + /// a binary value was read + /// examples are CBOR type 2 strings, MessagePack bin, and maybe UBJSON array + virtual bool binary(const std::vector& vec) = 0; + + /// a parse error occurred + /// the byte position and the last token are reported + virtual bool parse_error(int position, const std::string& last_token) = 0; + }; + using parser_callback_t = std::function; @@ -62,6 +109,10 @@ class parser : callback(cb), m_lexer(adapter), allow_exceptions(allow_exceptions_) {} + parser(detail::input_adapter_t adapter, SAX* s) + : m_lexer(adapter), sax(s) + {} + /*! @brief public parser interface @@ -122,6 +173,14 @@ class parser return not strict or (get_token() == token_type::end_of_input); } + bool sax_parse() + { + // read first token + get_token(); + + return sax_parse_internal(); + } + private: /*! @brief the actual parser @@ -520,6 +579,168 @@ class parser } } + bool sax_parse_internal() + { + switch (last_token) + { + case token_type::begin_object: + { + if (not sax->start_object(-1)) + { + return false; + } + + // read next token + get_token(); + + // closing } -> we are done + if (last_token == token_type::end_object) + { + return sax->end_object(); + } + + // parse values + while (true) + { + // parse key + if (last_token != token_type::value_string) + { + if (not sax->key(m_lexer.move_string())) + { + return false; + } + } + + // parse separator (:) + get_token(); + if (last_token != token_type::name_separator) + { + return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string()); + } + + // parse value + get_token(); + if (not sax_parse_internal()) + { + return false; + } + + // comma -> next value + get_token(); + if (last_token == token_type::value_separator) + { + get_token(); + continue; + } + + // closing } + if (last_token == token_type::end_object) + { + return sax->end_object(); + } + else + { + return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string()); + } + } + } + + case token_type::begin_array: + { + if (not sax->start_array(-1)) + { + return false; + } + + // read next token + get_token(); + + // closing ] -> we are done + if (last_token == token_type::end_array) + { + return sax->end_array(); + } + + // parse values + while (true) + { + // parse value + if (not sax_parse_internal()) + { + return false; + } + + // comma -> next value + get_token(); + if (last_token == token_type::value_separator) + { + get_token(); + continue; + } + + // closing ] + if (last_token == token_type::end_array) + { + return sax->end_array(); + } + else + { + return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string()); + } + } + } + + case token_type::value_float: + { + const auto res = m_lexer.get_number_float(); + + if (JSON_UNLIKELY(not std::isfinite(res))) + { + return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string()); + } + else + { + return sax->number_float(res, m_lexer.move_string()); + } + } + + case token_type::literal_false: + { + return sax->boolean(false); + } + + case token_type::literal_null: + { + return sax->null(); + } + + case token_type::literal_true: + { + return sax->boolean(true); + } + + case token_type::value_integer: + { + return sax->number_integer(m_lexer.get_number_integer()); + } + + case token_type::value_string: + { + return sax->string(m_lexer.move_string()); + } + + case token_type::value_unsigned: + { + return sax->number_unsigned(m_lexer.get_number_unsigned()); + } + + default: // the last token was unexpected + { + return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string()); + } + } + } + /// get next token from lexer token_type get_token() { @@ -584,6 +805,8 @@ class parser token_type expected = token_type::uninitialized; /// whether to throw exceptions in case of errors const bool allow_exceptions = true; + /// associated SAX parse event receiver + SAX* sax = nullptr; }; } } diff --git a/include/nlohmann/json.hpp b/include/nlohmann/json.hpp index f92729f3..e43d37e7 100644 --- a/include/nlohmann/json.hpp +++ b/include/nlohmann/json.hpp @@ -1054,6 +1054,8 @@ class basic_json */ using parse_event_t = typename parser::parse_event_t; + using SAX = typename parser::SAX; + /*! @brief per-element parser callback type @@ -5925,6 +5927,16 @@ class basic_json return parser(i).accept(true); } + static bool sax_parse(detail::input_adapter i, SAX* sax) + { + return parser(i, sax).sax_parse(); + } + + static bool sax_parse(detail::input_adapter& i, SAX* sax) + { + return parser(i, sax).sax_parse(); + } + /*! @brief deserialize from an iterator range with contiguous storage @@ -5994,6 +6006,15 @@ class basic_json return parser(detail::input_adapter(first, last)).accept(true); } + template::iterator_category>::value, int>::type = 0> + static bool sax_parse(IteratorType first, IteratorType last, SAX* sax) + { + return parser(detail::input_adapter(first, last), sax).sax_parse(); + } + /*! @brief deserialize from stream @deprecated This stream operator is deprecated and will be removed in diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index 3dcb834b..53b03421 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -3175,6 +3175,53 @@ class parser value }; + struct SAX + { + /// a null value was read + virtual bool null() = 0; + + /// a boolean value was read + virtual bool boolean(bool) = 0; + + /// an integer number was read + virtual bool number_integer(number_integer_t) = 0; + + /// an unsigned integer number was read + virtual bool number_unsigned(number_unsigned_t) = 0; + + /// a floating-point number was read + /// the string parameter contains the raw number value + virtual bool number_float(number_float_t, const std::string&) = 0; + + /// a string value was read + virtual bool string(const std::string&) = 0; + + /// the beginning of an object was read + /// binary formats may report the number of elements + virtual bool start_object(std::size_t elements) = 0; + + /// an object key was read + virtual bool key(const std::string&) = 0; + + /// the end of an object was read + virtual bool end_object() = 0; + + /// the beginning of an array was read + /// binary formats may report the number of elements + virtual bool start_array(std::size_t elements) = 0; + + /// the end of an array was read + virtual bool end_array() = 0; + + /// a binary value was read + /// examples are CBOR type 2 strings, MessagePack bin, and maybe UBJSON array + virtual bool binary(const std::vector& vec) = 0; + + /// a parse error occurred + /// the byte position and the last token are reported + virtual bool parse_error(int position, const std::string& last_token) = 0; + }; + using parser_callback_t = std::function; @@ -3185,6 +3232,10 @@ class parser : callback(cb), m_lexer(adapter), allow_exceptions(allow_exceptions_) {} + parser(detail::input_adapter_t adapter, SAX* s) + : m_lexer(adapter), sax(s) + {} + /*! @brief public parser interface @@ -3245,6 +3296,14 @@ class parser return not strict or (get_token() == token_type::end_of_input); } + bool sax_parse() + { + // read first token + get_token(); + + return sax_parse_internal(); + } + private: /*! @brief the actual parser @@ -3643,6 +3702,168 @@ class parser } } + bool sax_parse_internal() + { + switch (last_token) + { + case token_type::begin_object: + { + if (not sax->start_object(-1)) + { + return false; + } + + // read next token + get_token(); + + // closing } -> we are done + if (last_token == token_type::end_object) + { + return sax->end_object(); + } + + // parse values + while (true) + { + // parse key + if (last_token != token_type::value_string) + { + if (not sax->key(m_lexer.move_string())) + { + return false; + } + } + + // parse separator (:) + get_token(); + if (last_token != token_type::name_separator) + { + return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string()); + } + + // parse value + get_token(); + if (not sax_parse_internal()) + { + return false; + } + + // comma -> next value + get_token(); + if (last_token == token_type::value_separator) + { + get_token(); + continue; + } + + // closing } + if (last_token == token_type::end_object) + { + return sax->end_object(); + } + else + { + return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string()); + } + } + } + + case token_type::begin_array: + { + if (not sax->start_array(-1)) + { + return false; + } + + // read next token + get_token(); + + // closing ] -> we are done + if (last_token == token_type::end_array) + { + return sax->end_array(); + } + + // parse values + while (true) + { + // parse value + if (not sax_parse_internal()) + { + return false; + } + + // comma -> next value + get_token(); + if (last_token == token_type::value_separator) + { + get_token(); + continue; + } + + // closing ] + if (last_token == token_type::end_array) + { + return sax->end_array(); + } + else + { + return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string()); + } + } + } + + case token_type::value_float: + { + const auto res = m_lexer.get_number_float(); + + if (JSON_UNLIKELY(not std::isfinite(res))) + { + return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string()); + } + else + { + return sax->number_float(res, m_lexer.move_string()); + } + } + + case token_type::literal_false: + { + return sax->boolean(false); + } + + case token_type::literal_null: + { + return sax->null(); + } + + case token_type::literal_true: + { + return sax->boolean(true); + } + + case token_type::value_integer: + { + return sax->number_integer(m_lexer.get_number_integer()); + } + + case token_type::value_string: + { + return sax->string(m_lexer.move_string()); + } + + case token_type::value_unsigned: + { + return sax->number_unsigned(m_lexer.get_number_unsigned()); + } + + default: // the last token was unexpected + { + return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string()); + } + } + } + /// get next token from lexer token_type get_token() { @@ -3707,6 +3928,8 @@ class parser token_type expected = token_type::uninitialized; /// whether to throw exceptions in case of errors const bool allow_exceptions = true; + /// associated SAX parse event receiver + SAX* sax = nullptr; }; } } @@ -10652,6 +10875,8 @@ class basic_json */ using parse_event_t = typename parser::parse_event_t; + using SAX = typename parser::SAX; + /*! @brief per-element parser callback type @@ -15523,6 +15748,16 @@ class basic_json return parser(i).accept(true); } + static bool sax_parse(detail::input_adapter i, SAX* sax) + { + return parser(i, sax).sax_parse(); + } + + static bool sax_parse(detail::input_adapter& i, SAX* sax) + { + return parser(i, sax).sax_parse(); + } + /*! @brief deserialize from an iterator range with contiguous storage @@ -15592,6 +15827,15 @@ class basic_json return parser(detail::input_adapter(first, last)).accept(true); } + template::iterator_category>::value, int>::type = 0> + static bool sax_parse(IteratorType first, IteratorType last, SAX* sax) + { + return parser(detail::input_adapter(first, last), sax).sax_parse(); + } + /*! @brief deserialize from stream @deprecated This stream operator is deprecated and will be removed in diff --git a/test/src/unit-deserialization.cpp b/test/src/unit-deserialization.cpp index 6e46abe3..fd42af2f 100644 --- a/test/src/unit-deserialization.cpp +++ b/test/src/unit-deserialization.cpp @@ -34,18 +34,114 @@ using nlohmann::json; #include #include +class SaxEventLogger : public nlohmann::json::SAX +{ + public: + bool null() override + { + events.push_back("null()"); + return true; + } + + bool boolean(bool val) override + { + events.push_back(val ? "boolean(true)" : "boolean(false)"); + return true; + } + + bool number_integer(json::number_integer_t val) override + { + events.push_back("number_integer(" + std::to_string(val) + ")"); + return true; + } + + bool number_unsigned(json::number_unsigned_t val) override + { + events.push_back("number_unsigned(" + std::to_string(val) + ")"); + return true; + } + + bool number_float(json::number_float_t val, const std::string& s) override + { + events.push_back("number_float(" + s + ")"); + return true; + } + + bool string(const std::string& val) override + { + events.push_back("string(" + val + ")"); + return true; + } + + bool start_object(std::size_t elements) override + { + events.push_back("start_object(" + std::to_string(elements) + ")"); + return true; + } + + bool key(const std::string& val) override + { + events.push_back("key(" + val + ")"); + return true; + } + + bool end_object()override + { + events.push_back("end_object()"); + return true; + } + + bool start_array(std::size_t elements) override + { + events.push_back("start_array(" + std::to_string(elements) + ")"); + return true; + } + + bool end_array() override + { + events.push_back("end_array()"); + return true; + } + + bool binary(const std::vector& vec) override + { + events.push_back("binary()"); + return true; + } + + bool parse_error(int position, const std::string& last_token) override + { + events.push_back("parse_error(" + std::to_string(position) + ")"); + return false; + } + + std::vector events; +}; + TEST_CASE("deserialization") { SECTION("successful deserialization") { SECTION("stream") { - std::stringstream ss1, ss2; + std::stringstream ss1, ss2, ss3; ss1 << "[\"foo\",1,2,3,false,{\"one\":1}]"; ss2 << "[\"foo\",1,2,3,false,{\"one\":1}]"; + ss3 << "[\"foo\",1,2,3,false,{\"one\":1}]"; json j = json::parse(ss1); CHECK(json::accept(ss2)); CHECK(j == json({"foo", 1, 2, 3, false, {{"one", 1}}})); + + SaxEventLogger l; + CHECK(json::sax_parse(ss3, &l)); + CHECK(l.events.size() == 10); + CHECK(l.events == std::vector( + { + "start_array(18446744073709551615)", "string(foo)", + "number_unsigned(1)", "number_unsigned(2)", "number_unsigned(3)", + "boolean(false)", "start_object(18446744073709551615)", + "number_unsigned(1)", "end_object()", "end_array()" + })); } SECTION("string literal") @@ -54,6 +150,17 @@ TEST_CASE("deserialization") json j = json::parse(s); CHECK(json::accept(s)); CHECK(j == json({"foo", 1, 2, 3, false, {{"one", 1}}})); + + SaxEventLogger l; + CHECK(json::sax_parse(s, &l)); + CHECK(l.events.size() == 10); + CHECK(l.events == std::vector( + { + "start_array(18446744073709551615)", "string(foo)", + "number_unsigned(1)", "number_unsigned(2)", "number_unsigned(3)", + "boolean(false)", "start_object(18446744073709551615)", + "number_unsigned(1)", "end_object()", "end_array()" + })); } SECTION("string_t") @@ -62,6 +169,17 @@ TEST_CASE("deserialization") json j = json::parse(s); CHECK(json::accept(s)); CHECK(j == json({"foo", 1, 2, 3, false, {{"one", 1}}})); + + SaxEventLogger l; + CHECK(json::sax_parse(s, &l)); + CHECK(l.events.size() == 10); + CHECK(l.events == std::vector( + { + "start_array(18446744073709551615)", "string(foo)", + "number_unsigned(1)", "number_unsigned(2)", "number_unsigned(3)", + "boolean(false)", "start_object(18446744073709551615)", + "number_unsigned(1)", "end_object()", "end_array()" + })); } SECTION("operator<<") @@ -92,19 +210,31 @@ TEST_CASE("deserialization") { SECTION("stream") { - std::stringstream ss1, ss2, ss3, ss4; + std::stringstream ss1, ss2, ss3, ss4, ss5; ss1 << "[\"foo\",1,2,3,false,{\"one\":1}"; ss2 << "[\"foo\",1,2,3,false,{\"one\":1}"; ss3 << "[\"foo\",1,2,3,false,{\"one\":1}"; ss4 << "[\"foo\",1,2,3,false,{\"one\":1}"; + ss5 << "[\"foo\",1,2,3,false,{\"one\":1}"; CHECK_THROWS_AS(json::parse(ss1), json::parse_error&); CHECK_THROWS_WITH(json::parse(ss2), "[json.exception.parse_error.101] parse error at 29: syntax error - unexpected end of input; expected ']'"); CHECK(not json::accept(ss3)); json j_error; - CHECK_NOTHROW(j_error = json::parse(ss1, nullptr, false)); + CHECK_NOTHROW(j_error = json::parse(ss4, nullptr, false)); CHECK(j_error.is_discarded()); + + SaxEventLogger l; + CHECK(not json::sax_parse(ss5, &l)); + CHECK(l.events.size() == 10); + CHECK(l.events == std::vector( + { + "start_array(18446744073709551615)", "string(foo)", + "number_unsigned(1)", "number_unsigned(2)", "number_unsigned(3)", + "boolean(false)", "start_object(18446744073709551615)", + "number_unsigned(1)", "end_object()", "parse_error(29)" + })); } SECTION("string") @@ -118,6 +248,17 @@ TEST_CASE("deserialization") json j_error; CHECK_NOTHROW(j_error = json::parse(s, nullptr, false)); CHECK(j_error.is_discarded()); + + SaxEventLogger l; + CHECK(not json::sax_parse(s, &l)); + CHECK(l.events.size() == 10); + CHECK(l.events == std::vector( + { + "start_array(18446744073709551615)", "string(foo)", + "number_unsigned(1)", "number_unsigned(2)", "number_unsigned(3)", + "boolean(false)", "start_object(18446744073709551615)", + "number_unsigned(1)", "end_object()", "parse_error(29)" + })); } SECTION("operator<<") @@ -159,6 +300,11 @@ TEST_CASE("deserialization") std::vector v = {'t', 'r', 'u', 'e'}; CHECK(json::parse(v) == json(true)); CHECK(json::accept(v)); + + SaxEventLogger l; + CHECK(json::sax_parse(v, &l)); + CHECK(l.events.size() == 1); + CHECK(l.events == std::vector({"boolean(true)"})); } SECTION("from std::array") @@ -166,6 +312,11 @@ TEST_CASE("deserialization") std::array v { {'t', 'r', 'u', 'e'} }; CHECK(json::parse(v) == json(true)); CHECK(json::accept(v)); + + SaxEventLogger l; + CHECK(json::sax_parse(v, &l)); + CHECK(l.events.size() == 1); + CHECK(l.events == std::vector({"boolean(true)"})); } SECTION("from array") @@ -173,6 +324,11 @@ TEST_CASE("deserialization") uint8_t v[] = {'t', 'r', 'u', 'e'}; CHECK(json::parse(v) == json(true)); CHECK(json::accept(v)); + + SaxEventLogger l; + CHECK(json::sax_parse(v, &l)); + CHECK(l.events.size() == 1); + CHECK(l.events == std::vector({"boolean(true)"})); } SECTION("from chars") @@ -185,6 +341,12 @@ TEST_CASE("deserialization") v[4] = '\0'; CHECK(json::parse(v) == json(true)); CHECK(json::accept(v)); + + SaxEventLogger l; + CHECK(json::sax_parse(v, &l)); + CHECK(l.events.size() == 1); + CHECK(l.events == std::vector({"boolean(true)"})); + delete[] v; } @@ -193,6 +355,11 @@ TEST_CASE("deserialization") std::string v = {'t', 'r', 'u', 'e'}; CHECK(json::parse(v) == json(true)); CHECK(json::accept(v)); + + SaxEventLogger l; + CHECK(json::sax_parse(v, &l)); + CHECK(l.events.size() == 1); + CHECK(l.events == std::vector({"boolean(true)"})); } SECTION("from std::initializer_list") @@ -200,6 +367,11 @@ TEST_CASE("deserialization") std::initializer_list v = {'t', 'r', 'u', 'e'}; CHECK(json::parse(v) == json(true)); CHECK(json::accept(v)); + + SaxEventLogger l; + CHECK(json::sax_parse(v, &l)); + CHECK(l.events.size() == 1); + CHECK(l.events == std::vector({"boolean(true)"})); } SECTION("empty container") @@ -207,6 +379,11 @@ TEST_CASE("deserialization") std::vector v; CHECK_THROWS_AS(json::parse(v), json::parse_error&); CHECK(not json::accept(v)); + + SaxEventLogger l; + CHECK(not json::sax_parse(v, &l)); + CHECK(l.events.size() == 1); + CHECK(l.events == std::vector({"parse_error(1)"})); } } @@ -217,6 +394,12 @@ TEST_CASE("deserialization") std::vector v = {'t', 'r', 'u', 'e'}; CHECK(json::parse(std::begin(v), std::end(v)) == json(true)); CHECK(json::accept(std::begin(v), std::end(v))); + + SaxEventLogger l; + CHECK(json::sax_parse(std::begin(v), std::end(v), &l)); + CHECK(l.events.size() == 1); + CHECK(l.events == std::vector({"boolean(true)"})); + } SECTION("from std::array") @@ -224,6 +407,11 @@ TEST_CASE("deserialization") std::array v { {'t', 'r', 'u', 'e'} }; CHECK(json::parse(std::begin(v), std::end(v)) == json(true)); CHECK(json::accept(std::begin(v), std::end(v))); + + SaxEventLogger l; + CHECK(json::sax_parse(std::begin(v), std::end(v), &l)); + CHECK(l.events.size() == 1); + CHECK(l.events == std::vector({"boolean(true)"})); } SECTION("from array") @@ -231,6 +419,11 @@ TEST_CASE("deserialization") uint8_t v[] = {'t', 'r', 'u', 'e'}; CHECK(json::parse(std::begin(v), std::end(v)) == json(true)); CHECK(json::accept(std::begin(v), std::end(v))); + + SaxEventLogger l; + CHECK(json::sax_parse(std::begin(v), std::end(v), &l)); + CHECK(l.events.size() == 1); + CHECK(l.events == std::vector({"boolean(true)"})); } SECTION("from std::string") @@ -238,6 +431,11 @@ TEST_CASE("deserialization") std::string v = {'t', 'r', 'u', 'e'}; CHECK(json::parse(std::begin(v), std::end(v)) == json(true)); CHECK(json::accept(std::begin(v), std::end(v))); + + SaxEventLogger l; + CHECK(json::sax_parse(std::begin(v), std::end(v), &l)); + CHECK(l.events.size() == 1); + CHECK(l.events == std::vector({"boolean(true)"})); } SECTION("from std::initializer_list") @@ -245,6 +443,11 @@ TEST_CASE("deserialization") std::initializer_list v = {'t', 'r', 'u', 'e'}; CHECK(json::parse(std::begin(v), std::end(v)) == json(true)); CHECK(json::accept(std::begin(v), std::end(v))); + + SaxEventLogger l; + CHECK(json::sax_parse(std::begin(v), std::end(v), &l)); + CHECK(l.events.size() == 1); + CHECK(l.events == std::vector({"boolean(true)"})); } SECTION("from std::valarray") @@ -252,6 +455,11 @@ TEST_CASE("deserialization") std::valarray v = {'t', 'r', 'u', 'e'}; CHECK(json::parse(std::begin(v), std::end(v)) == json(true)); CHECK(json::accept(std::begin(v), std::end(v))); + + SaxEventLogger l; + CHECK(json::sax_parse(std::begin(v), std::end(v), &l)); + CHECK(l.events.size() == 1); + CHECK(l.events == std::vector({"boolean(true)"})); } SECTION("with empty range") @@ -259,6 +467,11 @@ TEST_CASE("deserialization") std::vector v; CHECK_THROWS_AS(json::parse(std::begin(v), std::end(v)), json::parse_error&); CHECK(not json::accept(std::begin(v), std::end(v))); + + SaxEventLogger l; + CHECK(not json::sax_parse(std::begin(v), std::end(v), &l)); + CHECK(l.events.size() == 1); + CHECK(l.events == std::vector({"parse_error(1)"})); } } @@ -274,6 +487,11 @@ TEST_CASE("deserialization") json j_error; CHECK_NOTHROW(j_error = json::parse(std::begin(v), std::end(v), nullptr, false)); CHECK(j_error.is_discarded()); + + SaxEventLogger l; + CHECK(not json::sax_parse(std::begin(v), std::end(v), &l)); + CHECK(l.events.size() == 1); + CHECK(l.events == std::vector({"parse_error(10)"})); } SECTION("case 2") @@ -285,6 +503,11 @@ TEST_CASE("deserialization") json j_error; CHECK_NOTHROW(j_error = json::parse(std::begin(v), std::end(v), nullptr, false)); CHECK(j_error.is_discarded()); + + SaxEventLogger l; + CHECK(not json::sax_parse(std::begin(v), std::end(v), &l)); + CHECK(l.events.size() == 1); + CHECK(l.events == std::vector({"parse_error(11)"})); } SECTION("case 3") @@ -296,6 +519,11 @@ TEST_CASE("deserialization") json j_error; CHECK_NOTHROW(j_error = json::parse(std::begin(v), std::end(v), nullptr, false)); CHECK(j_error.is_discarded()); + + SaxEventLogger l; + CHECK(not json::sax_parse(std::begin(v), std::end(v), &l)); + CHECK(l.events.size() == 1); + CHECK(l.events == std::vector({"parse_error(18)"})); } SECTION("case 4") @@ -307,6 +535,11 @@ TEST_CASE("deserialization") json j_error; CHECK_NOTHROW(j_error = json::parse(std::begin(v), std::end(v), nullptr, false)); CHECK(j_error.is_discarded()); + + SaxEventLogger l; + CHECK(not json::sax_parse(std::begin(v), std::end(v), &l)); + CHECK(l.events.size() == 1); + CHECK(l.events == std::vector({"parse_error(18)"})); } SECTION("case 5") @@ -318,6 +551,11 @@ TEST_CASE("deserialization") json j_error; CHECK_NOTHROW(j_error = json::parse(std::begin(v), std::end(v), nullptr, false)); CHECK(j_error.is_discarded()); + + SaxEventLogger l; + CHECK(not json::sax_parse(std::begin(v), std::end(v), &l)); + CHECK(l.events.size() == 1); + CHECK(l.events == std::vector({"parse_error(3)"})); } SECTION("case 6") @@ -331,6 +569,11 @@ TEST_CASE("deserialization") json j_error; CHECK_NOTHROW(j_error = json::parse(std::begin(v), std::end(v), nullptr, false)); CHECK(j_error.is_discarded()); + + SaxEventLogger l; + CHECK(not json::sax_parse(std::begin(v), std::end(v), &l)); + CHECK(l.events.size() == 1); + CHECK(l.events == std::vector({"parse_error(4)"})); } SECTION("case 7") @@ -342,6 +585,11 @@ TEST_CASE("deserialization") json j_error; CHECK_NOTHROW(j_error = json::parse(std::begin(v), std::end(v), nullptr, false)); CHECK(j_error.is_discarded()); + + SaxEventLogger l; + CHECK(not json::sax_parse(std::begin(v), std::end(v), &l)); + CHECK(l.events.size() == 1); + CHECK(l.events == std::vector({"parse_error(4)"})); } SECTION("case 8") @@ -353,6 +601,11 @@ TEST_CASE("deserialization") json j_error; CHECK_NOTHROW(j_error = json::parse(std::begin(v), std::end(v), nullptr, false)); CHECK(j_error.is_discarded()); + + SaxEventLogger l; + CHECK(not json::sax_parse(std::begin(v), std::end(v), &l)); + CHECK(l.events.size() == 1); + CHECK(l.events == std::vector({"parse_error(4)"})); } SECTION("case 9") @@ -364,6 +617,11 @@ TEST_CASE("deserialization") json j_error; CHECK_NOTHROW(j_error = json::parse(std::begin(v), std::end(v), nullptr, false)); CHECK(j_error.is_discarded()); + + SaxEventLogger l; + CHECK(not json::sax_parse(std::begin(v), std::end(v), &l)); + CHECK(l.events.size() == 1); + CHECK(l.events == std::vector({"parse_error(4)"})); } SECTION("case 10") @@ -375,6 +633,11 @@ TEST_CASE("deserialization") json j_error; CHECK_NOTHROW(j_error = json::parse(std::begin(v), std::end(v), nullptr, false)); CHECK(j_error.is_discarded()); + + SaxEventLogger l; + CHECK(not json::sax_parse(std::begin(v), std::end(v), &l)); + CHECK(l.events.size() == 1); + CHECK(l.events == std::vector({"parse_error(4)"})); } SECTION("case 11") @@ -386,6 +649,11 @@ TEST_CASE("deserialization") json j_error; CHECK_NOTHROW(j_error = json::parse(std::begin(v), std::end(v), nullptr, false)); CHECK(j_error.is_discarded()); + + SaxEventLogger l; + CHECK(not json::sax_parse(std::begin(v), std::end(v), &l)); + CHECK(l.events.size() == 1); + CHECK(l.events == std::vector({"parse_error(4)"})); } SECTION("case 12") @@ -397,6 +665,11 @@ TEST_CASE("deserialization") json j_error; CHECK_NOTHROW(j_error = json::parse(std::begin(v), std::end(v), nullptr, false)); CHECK(j_error.is_discarded()); + + SaxEventLogger l; + CHECK(not json::sax_parse(std::begin(v), std::end(v), &l)); + CHECK(l.events.size() == 1); + CHECK(l.events == std::vector({"parse_error(4)"})); } SECTION("case 13") @@ -408,6 +681,11 @@ TEST_CASE("deserialization") json j_error; CHECK_NOTHROW(j_error = json::parse(std::begin(v), std::end(v), nullptr, false)); CHECK(j_error.is_discarded()); + + SaxEventLogger l; + CHECK(not json::sax_parse(std::begin(v), std::end(v), &l)); + CHECK(l.events.size() == 1); + CHECK(l.events == std::vector({"parse_error(4)"})); } SECTION("case 14") @@ -419,6 +697,11 @@ TEST_CASE("deserialization") json j_error; CHECK_NOTHROW(j_error = json::parse(std::begin(v), std::end(v), nullptr, false)); CHECK(j_error.is_discarded()); + + SaxEventLogger l; + CHECK(not json::sax_parse(std::begin(v), std::end(v), &l)); + CHECK(l.events.size() == 1); + CHECK(l.events == std::vector({"parse_error(4)"})); } SECTION("case 15") @@ -430,6 +713,11 @@ TEST_CASE("deserialization") json j_error; CHECK_NOTHROW(j_error = json::parse(std::begin(v), std::end(v), nullptr, false)); CHECK(j_error.is_discarded()); + + SaxEventLogger l; + CHECK(not json::sax_parse(std::begin(v), std::end(v), &l)); + CHECK(l.events.size() == 1); + CHECK(l.events == std::vector({"parse_error(4)"})); } SECTION("case 16") @@ -441,6 +729,15 @@ TEST_CASE("deserialization") json j_error; CHECK_NOTHROW(j_error = json::parse(std::begin(v), std::end(v), nullptr, false)); CHECK(j_error.is_discarded()); + + SaxEventLogger l; + CHECK(not json::sax_parse(std::begin(v), std::end(v), &l)); + CHECK(l.events.size() == 3); + CHECK(l.events == std::vector( + { + "start_object(18446744073709551615)", "number_unsigned(11)", + "parse_error(7)" + })); } } } @@ -458,12 +755,34 @@ TEST_CASE("deserialization") CHECK_THROWS_AS(json::parse(std::istringstream(bom)), json::parse_error&); CHECK_THROWS_WITH(json::parse(std::istringstream(bom)), "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal"); + + SaxEventLogger l; + CHECK(not json::sax_parse(bom, &l)); + CHECK(l.events.size() == 1); + CHECK(l.events == std::vector( + { + "parse_error(1)" + })); } SECTION("BOM and content") { CHECK(json::parse(bom + "1") == 1); CHECK(json::parse(std::istringstream(bom + "1")) == 1); + + SaxEventLogger l1, l2; + CHECK(json::sax_parse(std::istringstream(bom + "1"), &l1)); + CHECK(json::sax_parse(bom + "1", &l2)); + CHECK(l1.events.size() == 1); + CHECK(l1.events == std::vector( + { + "number_unsigned(1)" + })); + CHECK(l2.events.size() == 1); + CHECK(l2.events == std::vector( + { + "number_unsigned(1)" + })); } SECTION("2 byte of BOM") @@ -475,6 +794,20 @@ TEST_CASE("deserialization") CHECK_THROWS_AS(json::parse(std::istringstream(bom.substr(0, 2))), json::parse_error&); CHECK_THROWS_WITH(json::parse(std::istringstream(bom)), "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal"); + + SaxEventLogger l1, l2; + CHECK(not json::sax_parse(std::istringstream(bom.substr(0, 2)), &l1)); + CHECK(not json::sax_parse(bom.substr(0, 2), &l2)); + CHECK(l1.events.size() == 1); + CHECK(l1.events == std::vector( + { + "parse_error(1)" + })); + CHECK(l2.events.size() == 1); + CHECK(l2.events == std::vector( + { + "parse_error(1)" + })); } SECTION("1 byte of BOM") @@ -486,6 +819,20 @@ TEST_CASE("deserialization") CHECK_THROWS_AS(json::parse(std::istringstream(bom.substr(0, 1))), json::parse_error&); CHECK_THROWS_WITH(json::parse(std::istringstream(bom)), "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal"); + + SaxEventLogger l1, l2; + CHECK(not json::sax_parse(std::istringstream(bom.substr(0, 1)), &l1)); + CHECK(not json::sax_parse(bom.substr(0, 1), &l2)); + CHECK(l1.events.size() == 1); + CHECK(l1.events == std::vector( + { + "parse_error(1)" + })); + CHECK(l2.events.size() == 1); + CHECK(l2.events == std::vector( + { + "parse_error(1)" + })); } SECTION("variations") @@ -513,12 +860,28 @@ TEST_CASE("deserialization") // without any variation, we skip the BOM CHECK(json::parse(s + "null") == json()); CHECK(json::parse(std::istringstream(s + "null")) == json()); + + SaxEventLogger l; + CHECK(json::sax_parse(s + "null", &l)); + CHECK(l.events.size() == 1); + CHECK(l.events == std::vector( + { + "null()" + })); } else { // any variation is an error CHECK_THROWS_AS(json::parse(s + "null"), json::parse_error&); CHECK_THROWS_AS(json::parse(std::istringstream(s + "null")), json::parse_error&); + + SaxEventLogger l; + CHECK(not json::sax_parse(s + "null", &l)); + CHECK(l.events.size() == 1); + CHECK(l.events == std::vector( + { + "parse_error(1)" + })); } } }