From 374ebacc51cea921414e64806caab254569b23f2 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Sat, 24 Feb 2018 18:04:07 +0100 Subject: [PATCH 01/43] :sparkles: added a SAX parser #971 --- include/nlohmann/detail/input/parser.hpp | 223 ++++++++++++++ include/nlohmann/json.hpp | 21 ++ single_include/nlohmann/json.hpp | 244 +++++++++++++++ test/src/unit-deserialization.cpp | 369 ++++++++++++++++++++++- 4 files changed, 854 insertions(+), 3 deletions(-) diff --git a/include/nlohmann/detail/input/parser.hpp b/include/nlohmann/detail/input/parser.hpp index 63e8541f..009ea994 100644 --- a/include/nlohmann/detail/input/parser.hpp +++ b/include/nlohmann/detail/input/parser.hpp @@ -52,6 +52,53 @@ class parser value }; + struct SAX + { + /// a null value was read + virtual bool null() = 0; + + /// a boolean value was read + virtual bool boolean(bool) = 0; + + /// an integer number was read + virtual bool number_integer(number_integer_t) = 0; + + /// an unsigned integer number was read + virtual bool number_unsigned(number_unsigned_t) = 0; + + /// a floating-point number was read + /// the string parameter contains the raw number value + virtual bool number_float(number_float_t, const std::string&) = 0; + + /// a string value was read + virtual bool string(const std::string&) = 0; + + /// the beginning of an object was read + /// binary formats may report the number of elements + virtual bool start_object(std::size_t elements) = 0; + + /// an object key was read + virtual bool key(const std::string&) = 0; + + /// the end of an object was read + virtual bool end_object() = 0; + + /// the beginning of an array was read + /// binary formats may report the number of elements + virtual bool start_array(std::size_t elements) = 0; + + /// the end of an array was read + virtual bool end_array() = 0; + + /// a binary value was read + /// examples are CBOR type 2 strings, MessagePack bin, and maybe UBJSON array + virtual bool binary(const std::vector& vec) = 0; + + /// a parse error occurred + /// the byte position and the last token are reported + virtual bool parse_error(int position, const std::string& last_token) = 0; + }; + using parser_callback_t = std::function; @@ -62,6 +109,10 @@ class parser : callback(cb), m_lexer(adapter), allow_exceptions(allow_exceptions_) {} + parser(detail::input_adapter_t adapter, SAX* s) + : m_lexer(adapter), sax(s) + {} + /*! @brief public parser interface @@ -122,6 +173,14 @@ class parser return not strict or (get_token() == token_type::end_of_input); } + bool sax_parse() + { + // read first token + get_token(); + + return sax_parse_internal(); + } + private: /*! @brief the actual parser @@ -520,6 +579,168 @@ class parser } } + bool sax_parse_internal() + { + switch (last_token) + { + case token_type::begin_object: + { + if (not sax->start_object(-1)) + { + return false; + } + + // read next token + get_token(); + + // closing } -> we are done + if (last_token == token_type::end_object) + { + return sax->end_object(); + } + + // parse values + while (true) + { + // parse key + if (last_token != token_type::value_string) + { + if (not sax->key(m_lexer.move_string())) + { + return false; + } + } + + // parse separator (:) + get_token(); + if (last_token != token_type::name_separator) + { + return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string()); + } + + // parse value + get_token(); + if (not sax_parse_internal()) + { + return false; + } + + // comma -> next value + get_token(); + if (last_token == token_type::value_separator) + { + get_token(); + continue; + } + + // closing } + if (last_token == token_type::end_object) + { + return sax->end_object(); + } + else + { + return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string()); + } + } + } + + case token_type::begin_array: + { + if (not sax->start_array(-1)) + { + return false; + } + + // read next token + get_token(); + + // closing ] -> we are done + if (last_token == token_type::end_array) + { + return sax->end_array(); + } + + // parse values + while (true) + { + // parse value + if (not sax_parse_internal()) + { + return false; + } + + // comma -> next value + get_token(); + if (last_token == token_type::value_separator) + { + get_token(); + continue; + } + + // closing ] + if (last_token == token_type::end_array) + { + return sax->end_array(); + } + else + { + return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string()); + } + } + } + + case token_type::value_float: + { + const auto res = m_lexer.get_number_float(); + + if (JSON_UNLIKELY(not std::isfinite(res))) + { + return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string()); + } + else + { + return sax->number_float(res, m_lexer.move_string()); + } + } + + case token_type::literal_false: + { + return sax->boolean(false); + } + + case token_type::literal_null: + { + return sax->null(); + } + + case token_type::literal_true: + { + return sax->boolean(true); + } + + case token_type::value_integer: + { + return sax->number_integer(m_lexer.get_number_integer()); + } + + case token_type::value_string: + { + return sax->string(m_lexer.move_string()); + } + + case token_type::value_unsigned: + { + return sax->number_unsigned(m_lexer.get_number_unsigned()); + } + + default: // the last token was unexpected + { + return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string()); + } + } + } + /// get next token from lexer token_type get_token() { @@ -584,6 +805,8 @@ class parser token_type expected = token_type::uninitialized; /// whether to throw exceptions in case of errors const bool allow_exceptions = true; + /// associated SAX parse event receiver + SAX* sax = nullptr; }; } } diff --git a/include/nlohmann/json.hpp b/include/nlohmann/json.hpp index f92729f3..e43d37e7 100644 --- a/include/nlohmann/json.hpp +++ b/include/nlohmann/json.hpp @@ -1054,6 +1054,8 @@ class basic_json */ using parse_event_t = typename parser::parse_event_t; + using SAX = typename parser::SAX; + /*! @brief per-element parser callback type @@ -5925,6 +5927,16 @@ class basic_json return parser(i).accept(true); } + static bool sax_parse(detail::input_adapter i, SAX* sax) + { + return parser(i, sax).sax_parse(); + } + + static bool sax_parse(detail::input_adapter& i, SAX* sax) + { + return parser(i, sax).sax_parse(); + } + /*! @brief deserialize from an iterator range with contiguous storage @@ -5994,6 +6006,15 @@ class basic_json return parser(detail::input_adapter(first, last)).accept(true); } + template::iterator_category>::value, int>::type = 0> + static bool sax_parse(IteratorType first, IteratorType last, SAX* sax) + { + return parser(detail::input_adapter(first, last), sax).sax_parse(); + } + /*! @brief deserialize from stream @deprecated This stream operator is deprecated and will be removed in diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index 3dcb834b..53b03421 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -3175,6 +3175,53 @@ class parser value }; + struct SAX + { + /// a null value was read + virtual bool null() = 0; + + /// a boolean value was read + virtual bool boolean(bool) = 0; + + /// an integer number was read + virtual bool number_integer(number_integer_t) = 0; + + /// an unsigned integer number was read + virtual bool number_unsigned(number_unsigned_t) = 0; + + /// a floating-point number was read + /// the string parameter contains the raw number value + virtual bool number_float(number_float_t, const std::string&) = 0; + + /// a string value was read + virtual bool string(const std::string&) = 0; + + /// the beginning of an object was read + /// binary formats may report the number of elements + virtual bool start_object(std::size_t elements) = 0; + + /// an object key was read + virtual bool key(const std::string&) = 0; + + /// the end of an object was read + virtual bool end_object() = 0; + + /// the beginning of an array was read + /// binary formats may report the number of elements + virtual bool start_array(std::size_t elements) = 0; + + /// the end of an array was read + virtual bool end_array() = 0; + + /// a binary value was read + /// examples are CBOR type 2 strings, MessagePack bin, and maybe UBJSON array + virtual bool binary(const std::vector& vec) = 0; + + /// a parse error occurred + /// the byte position and the last token are reported + virtual bool parse_error(int position, const std::string& last_token) = 0; + }; + using parser_callback_t = std::function; @@ -3185,6 +3232,10 @@ class parser : callback(cb), m_lexer(adapter), allow_exceptions(allow_exceptions_) {} + parser(detail::input_adapter_t adapter, SAX* s) + : m_lexer(adapter), sax(s) + {} + /*! @brief public parser interface @@ -3245,6 +3296,14 @@ class parser return not strict or (get_token() == token_type::end_of_input); } + bool sax_parse() + { + // read first token + get_token(); + + return sax_parse_internal(); + } + private: /*! @brief the actual parser @@ -3643,6 +3702,168 @@ class parser } } + bool sax_parse_internal() + { + switch (last_token) + { + case token_type::begin_object: + { + if (not sax->start_object(-1)) + { + return false; + } + + // read next token + get_token(); + + // closing } -> we are done + if (last_token == token_type::end_object) + { + return sax->end_object(); + } + + // parse values + while (true) + { + // parse key + if (last_token != token_type::value_string) + { + if (not sax->key(m_lexer.move_string())) + { + return false; + } + } + + // parse separator (:) + get_token(); + if (last_token != token_type::name_separator) + { + return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string()); + } + + // parse value + get_token(); + if (not sax_parse_internal()) + { + return false; + } + + // comma -> next value + get_token(); + if (last_token == token_type::value_separator) + { + get_token(); + continue; + } + + // closing } + if (last_token == token_type::end_object) + { + return sax->end_object(); + } + else + { + return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string()); + } + } + } + + case token_type::begin_array: + { + if (not sax->start_array(-1)) + { + return false; + } + + // read next token + get_token(); + + // closing ] -> we are done + if (last_token == token_type::end_array) + { + return sax->end_array(); + } + + // parse values + while (true) + { + // parse value + if (not sax_parse_internal()) + { + return false; + } + + // comma -> next value + get_token(); + if (last_token == token_type::value_separator) + { + get_token(); + continue; + } + + // closing ] + if (last_token == token_type::end_array) + { + return sax->end_array(); + } + else + { + return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string()); + } + } + } + + case token_type::value_float: + { + const auto res = m_lexer.get_number_float(); + + if (JSON_UNLIKELY(not std::isfinite(res))) + { + return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string()); + } + else + { + return sax->number_float(res, m_lexer.move_string()); + } + } + + case token_type::literal_false: + { + return sax->boolean(false); + } + + case token_type::literal_null: + { + return sax->null(); + } + + case token_type::literal_true: + { + return sax->boolean(true); + } + + case token_type::value_integer: + { + return sax->number_integer(m_lexer.get_number_integer()); + } + + case token_type::value_string: + { + return sax->string(m_lexer.move_string()); + } + + case token_type::value_unsigned: + { + return sax->number_unsigned(m_lexer.get_number_unsigned()); + } + + default: // the last token was unexpected + { + return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string()); + } + } + } + /// get next token from lexer token_type get_token() { @@ -3707,6 +3928,8 @@ class parser token_type expected = token_type::uninitialized; /// whether to throw exceptions in case of errors const bool allow_exceptions = true; + /// associated SAX parse event receiver + SAX* sax = nullptr; }; } } @@ -10652,6 +10875,8 @@ class basic_json */ using parse_event_t = typename parser::parse_event_t; + using SAX = typename parser::SAX; + /*! @brief per-element parser callback type @@ -15523,6 +15748,16 @@ class basic_json return parser(i).accept(true); } + static bool sax_parse(detail::input_adapter i, SAX* sax) + { + return parser(i, sax).sax_parse(); + } + + static bool sax_parse(detail::input_adapter& i, SAX* sax) + { + return parser(i, sax).sax_parse(); + } + /*! @brief deserialize from an iterator range with contiguous storage @@ -15592,6 +15827,15 @@ class basic_json return parser(detail::input_adapter(first, last)).accept(true); } + template::iterator_category>::value, int>::type = 0> + static bool sax_parse(IteratorType first, IteratorType last, SAX* sax) + { + return parser(detail::input_adapter(first, last), sax).sax_parse(); + } + /*! @brief deserialize from stream @deprecated This stream operator is deprecated and will be removed in diff --git a/test/src/unit-deserialization.cpp b/test/src/unit-deserialization.cpp index 6e46abe3..fd42af2f 100644 --- a/test/src/unit-deserialization.cpp +++ b/test/src/unit-deserialization.cpp @@ -34,18 +34,114 @@ using nlohmann::json; #include #include +class SaxEventLogger : public nlohmann::json::SAX +{ + public: + bool null() override + { + events.push_back("null()"); + return true; + } + + bool boolean(bool val) override + { + events.push_back(val ? "boolean(true)" : "boolean(false)"); + return true; + } + + bool number_integer(json::number_integer_t val) override + { + events.push_back("number_integer(" + std::to_string(val) + ")"); + return true; + } + + bool number_unsigned(json::number_unsigned_t val) override + { + events.push_back("number_unsigned(" + std::to_string(val) + ")"); + return true; + } + + bool number_float(json::number_float_t val, const std::string& s) override + { + events.push_back("number_float(" + s + ")"); + return true; + } + + bool string(const std::string& val) override + { + events.push_back("string(" + val + ")"); + return true; + } + + bool start_object(std::size_t elements) override + { + events.push_back("start_object(" + std::to_string(elements) + ")"); + return true; + } + + bool key(const std::string& val) override + { + events.push_back("key(" + val + ")"); + return true; + } + + bool end_object()override + { + events.push_back("end_object()"); + return true; + } + + bool start_array(std::size_t elements) override + { + events.push_back("start_array(" + std::to_string(elements) + ")"); + return true; + } + + bool end_array() override + { + events.push_back("end_array()"); + return true; + } + + bool binary(const std::vector& vec) override + { + events.push_back("binary()"); + return true; + } + + bool parse_error(int position, const std::string& last_token) override + { + events.push_back("parse_error(" + std::to_string(position) + ")"); + return false; + } + + std::vector events; +}; + TEST_CASE("deserialization") { SECTION("successful deserialization") { SECTION("stream") { - std::stringstream ss1, ss2; + std::stringstream ss1, ss2, ss3; ss1 << "[\"foo\",1,2,3,false,{\"one\":1}]"; ss2 << "[\"foo\",1,2,3,false,{\"one\":1}]"; + ss3 << "[\"foo\",1,2,3,false,{\"one\":1}]"; json j = json::parse(ss1); CHECK(json::accept(ss2)); CHECK(j == json({"foo", 1, 2, 3, false, {{"one", 1}}})); + + SaxEventLogger l; + CHECK(json::sax_parse(ss3, &l)); + CHECK(l.events.size() == 10); + CHECK(l.events == std::vector( + { + "start_array(18446744073709551615)", "string(foo)", + "number_unsigned(1)", "number_unsigned(2)", "number_unsigned(3)", + "boolean(false)", "start_object(18446744073709551615)", + "number_unsigned(1)", "end_object()", "end_array()" + })); } SECTION("string literal") @@ -54,6 +150,17 @@ TEST_CASE("deserialization") json j = json::parse(s); CHECK(json::accept(s)); CHECK(j == json({"foo", 1, 2, 3, false, {{"one", 1}}})); + + SaxEventLogger l; + CHECK(json::sax_parse(s, &l)); + CHECK(l.events.size() == 10); + CHECK(l.events == std::vector( + { + "start_array(18446744073709551615)", "string(foo)", + "number_unsigned(1)", "number_unsigned(2)", "number_unsigned(3)", + "boolean(false)", "start_object(18446744073709551615)", + "number_unsigned(1)", "end_object()", "end_array()" + })); } SECTION("string_t") @@ -62,6 +169,17 @@ TEST_CASE("deserialization") json j = json::parse(s); CHECK(json::accept(s)); CHECK(j == json({"foo", 1, 2, 3, false, {{"one", 1}}})); + + SaxEventLogger l; + CHECK(json::sax_parse(s, &l)); + CHECK(l.events.size() == 10); + CHECK(l.events == std::vector( + { + "start_array(18446744073709551615)", "string(foo)", + "number_unsigned(1)", "number_unsigned(2)", "number_unsigned(3)", + "boolean(false)", "start_object(18446744073709551615)", + "number_unsigned(1)", "end_object()", "end_array()" + })); } SECTION("operator<<") @@ -92,19 +210,31 @@ TEST_CASE("deserialization") { SECTION("stream") { - std::stringstream ss1, ss2, ss3, ss4; + std::stringstream ss1, ss2, ss3, ss4, ss5; ss1 << "[\"foo\",1,2,3,false,{\"one\":1}"; ss2 << "[\"foo\",1,2,3,false,{\"one\":1}"; ss3 << "[\"foo\",1,2,3,false,{\"one\":1}"; ss4 << "[\"foo\",1,2,3,false,{\"one\":1}"; + ss5 << "[\"foo\",1,2,3,false,{\"one\":1}"; CHECK_THROWS_AS(json::parse(ss1), json::parse_error&); CHECK_THROWS_WITH(json::parse(ss2), "[json.exception.parse_error.101] parse error at 29: syntax error - unexpected end of input; expected ']'"); CHECK(not json::accept(ss3)); json j_error; - CHECK_NOTHROW(j_error = json::parse(ss1, nullptr, false)); + CHECK_NOTHROW(j_error = json::parse(ss4, nullptr, false)); CHECK(j_error.is_discarded()); + + SaxEventLogger l; + CHECK(not json::sax_parse(ss5, &l)); + CHECK(l.events.size() == 10); + CHECK(l.events == std::vector( + { + "start_array(18446744073709551615)", "string(foo)", + "number_unsigned(1)", "number_unsigned(2)", "number_unsigned(3)", + "boolean(false)", "start_object(18446744073709551615)", + "number_unsigned(1)", "end_object()", "parse_error(29)" + })); } SECTION("string") @@ -118,6 +248,17 @@ TEST_CASE("deserialization") json j_error; CHECK_NOTHROW(j_error = json::parse(s, nullptr, false)); CHECK(j_error.is_discarded()); + + SaxEventLogger l; + CHECK(not json::sax_parse(s, &l)); + CHECK(l.events.size() == 10); + CHECK(l.events == std::vector( + { + "start_array(18446744073709551615)", "string(foo)", + "number_unsigned(1)", "number_unsigned(2)", "number_unsigned(3)", + "boolean(false)", "start_object(18446744073709551615)", + "number_unsigned(1)", "end_object()", "parse_error(29)" + })); } SECTION("operator<<") @@ -159,6 +300,11 @@ TEST_CASE("deserialization") std::vector v = {'t', 'r', 'u', 'e'}; CHECK(json::parse(v) == json(true)); CHECK(json::accept(v)); + + SaxEventLogger l; + CHECK(json::sax_parse(v, &l)); + CHECK(l.events.size() == 1); + CHECK(l.events == std::vector({"boolean(true)"})); } SECTION("from std::array") @@ -166,6 +312,11 @@ TEST_CASE("deserialization") std::array v { {'t', 'r', 'u', 'e'} }; CHECK(json::parse(v) == json(true)); CHECK(json::accept(v)); + + SaxEventLogger l; + CHECK(json::sax_parse(v, &l)); + CHECK(l.events.size() == 1); + CHECK(l.events == std::vector({"boolean(true)"})); } SECTION("from array") @@ -173,6 +324,11 @@ TEST_CASE("deserialization") uint8_t v[] = {'t', 'r', 'u', 'e'}; CHECK(json::parse(v) == json(true)); CHECK(json::accept(v)); + + SaxEventLogger l; + CHECK(json::sax_parse(v, &l)); + CHECK(l.events.size() == 1); + CHECK(l.events == std::vector({"boolean(true)"})); } SECTION("from chars") @@ -185,6 +341,12 @@ TEST_CASE("deserialization") v[4] = '\0'; CHECK(json::parse(v) == json(true)); CHECK(json::accept(v)); + + SaxEventLogger l; + CHECK(json::sax_parse(v, &l)); + CHECK(l.events.size() == 1); + CHECK(l.events == std::vector({"boolean(true)"})); + delete[] v; } @@ -193,6 +355,11 @@ TEST_CASE("deserialization") std::string v = {'t', 'r', 'u', 'e'}; CHECK(json::parse(v) == json(true)); CHECK(json::accept(v)); + + SaxEventLogger l; + CHECK(json::sax_parse(v, &l)); + CHECK(l.events.size() == 1); + CHECK(l.events == std::vector({"boolean(true)"})); } SECTION("from std::initializer_list") @@ -200,6 +367,11 @@ TEST_CASE("deserialization") std::initializer_list v = {'t', 'r', 'u', 'e'}; CHECK(json::parse(v) == json(true)); CHECK(json::accept(v)); + + SaxEventLogger l; + CHECK(json::sax_parse(v, &l)); + CHECK(l.events.size() == 1); + CHECK(l.events == std::vector({"boolean(true)"})); } SECTION("empty container") @@ -207,6 +379,11 @@ TEST_CASE("deserialization") std::vector v; CHECK_THROWS_AS(json::parse(v), json::parse_error&); CHECK(not json::accept(v)); + + SaxEventLogger l; + CHECK(not json::sax_parse(v, &l)); + CHECK(l.events.size() == 1); + CHECK(l.events == std::vector({"parse_error(1)"})); } } @@ -217,6 +394,12 @@ TEST_CASE("deserialization") std::vector v = {'t', 'r', 'u', 'e'}; CHECK(json::parse(std::begin(v), std::end(v)) == json(true)); CHECK(json::accept(std::begin(v), std::end(v))); + + SaxEventLogger l; + CHECK(json::sax_parse(std::begin(v), std::end(v), &l)); + CHECK(l.events.size() == 1); + CHECK(l.events == std::vector({"boolean(true)"})); + } SECTION("from std::array") @@ -224,6 +407,11 @@ TEST_CASE("deserialization") std::array v { {'t', 'r', 'u', 'e'} }; CHECK(json::parse(std::begin(v), std::end(v)) == json(true)); CHECK(json::accept(std::begin(v), std::end(v))); + + SaxEventLogger l; + CHECK(json::sax_parse(std::begin(v), std::end(v), &l)); + CHECK(l.events.size() == 1); + CHECK(l.events == std::vector({"boolean(true)"})); } SECTION("from array") @@ -231,6 +419,11 @@ TEST_CASE("deserialization") uint8_t v[] = {'t', 'r', 'u', 'e'}; CHECK(json::parse(std::begin(v), std::end(v)) == json(true)); CHECK(json::accept(std::begin(v), std::end(v))); + + SaxEventLogger l; + CHECK(json::sax_parse(std::begin(v), std::end(v), &l)); + CHECK(l.events.size() == 1); + CHECK(l.events == std::vector({"boolean(true)"})); } SECTION("from std::string") @@ -238,6 +431,11 @@ TEST_CASE("deserialization") std::string v = {'t', 'r', 'u', 'e'}; CHECK(json::parse(std::begin(v), std::end(v)) == json(true)); CHECK(json::accept(std::begin(v), std::end(v))); + + SaxEventLogger l; + CHECK(json::sax_parse(std::begin(v), std::end(v), &l)); + CHECK(l.events.size() == 1); + CHECK(l.events == std::vector({"boolean(true)"})); } SECTION("from std::initializer_list") @@ -245,6 +443,11 @@ TEST_CASE("deserialization") std::initializer_list v = {'t', 'r', 'u', 'e'}; CHECK(json::parse(std::begin(v), std::end(v)) == json(true)); CHECK(json::accept(std::begin(v), std::end(v))); + + SaxEventLogger l; + CHECK(json::sax_parse(std::begin(v), std::end(v), &l)); + CHECK(l.events.size() == 1); + CHECK(l.events == std::vector({"boolean(true)"})); } SECTION("from std::valarray") @@ -252,6 +455,11 @@ TEST_CASE("deserialization") std::valarray v = {'t', 'r', 'u', 'e'}; CHECK(json::parse(std::begin(v), std::end(v)) == json(true)); CHECK(json::accept(std::begin(v), std::end(v))); + + SaxEventLogger l; + CHECK(json::sax_parse(std::begin(v), std::end(v), &l)); + CHECK(l.events.size() == 1); + CHECK(l.events == std::vector({"boolean(true)"})); } SECTION("with empty range") @@ -259,6 +467,11 @@ TEST_CASE("deserialization") std::vector v; CHECK_THROWS_AS(json::parse(std::begin(v), std::end(v)), json::parse_error&); CHECK(not json::accept(std::begin(v), std::end(v))); + + SaxEventLogger l; + CHECK(not json::sax_parse(std::begin(v), std::end(v), &l)); + CHECK(l.events.size() == 1); + CHECK(l.events == std::vector({"parse_error(1)"})); } } @@ -274,6 +487,11 @@ TEST_CASE("deserialization") json j_error; CHECK_NOTHROW(j_error = json::parse(std::begin(v), std::end(v), nullptr, false)); CHECK(j_error.is_discarded()); + + SaxEventLogger l; + CHECK(not json::sax_parse(std::begin(v), std::end(v), &l)); + CHECK(l.events.size() == 1); + CHECK(l.events == std::vector({"parse_error(10)"})); } SECTION("case 2") @@ -285,6 +503,11 @@ TEST_CASE("deserialization") json j_error; CHECK_NOTHROW(j_error = json::parse(std::begin(v), std::end(v), nullptr, false)); CHECK(j_error.is_discarded()); + + SaxEventLogger l; + CHECK(not json::sax_parse(std::begin(v), std::end(v), &l)); + CHECK(l.events.size() == 1); + CHECK(l.events == std::vector({"parse_error(11)"})); } SECTION("case 3") @@ -296,6 +519,11 @@ TEST_CASE("deserialization") json j_error; CHECK_NOTHROW(j_error = json::parse(std::begin(v), std::end(v), nullptr, false)); CHECK(j_error.is_discarded()); + + SaxEventLogger l; + CHECK(not json::sax_parse(std::begin(v), std::end(v), &l)); + CHECK(l.events.size() == 1); + CHECK(l.events == std::vector({"parse_error(18)"})); } SECTION("case 4") @@ -307,6 +535,11 @@ TEST_CASE("deserialization") json j_error; CHECK_NOTHROW(j_error = json::parse(std::begin(v), std::end(v), nullptr, false)); CHECK(j_error.is_discarded()); + + SaxEventLogger l; + CHECK(not json::sax_parse(std::begin(v), std::end(v), &l)); + CHECK(l.events.size() == 1); + CHECK(l.events == std::vector({"parse_error(18)"})); } SECTION("case 5") @@ -318,6 +551,11 @@ TEST_CASE("deserialization") json j_error; CHECK_NOTHROW(j_error = json::parse(std::begin(v), std::end(v), nullptr, false)); CHECK(j_error.is_discarded()); + + SaxEventLogger l; + CHECK(not json::sax_parse(std::begin(v), std::end(v), &l)); + CHECK(l.events.size() == 1); + CHECK(l.events == std::vector({"parse_error(3)"})); } SECTION("case 6") @@ -331,6 +569,11 @@ TEST_CASE("deserialization") json j_error; CHECK_NOTHROW(j_error = json::parse(std::begin(v), std::end(v), nullptr, false)); CHECK(j_error.is_discarded()); + + SaxEventLogger l; + CHECK(not json::sax_parse(std::begin(v), std::end(v), &l)); + CHECK(l.events.size() == 1); + CHECK(l.events == std::vector({"parse_error(4)"})); } SECTION("case 7") @@ -342,6 +585,11 @@ TEST_CASE("deserialization") json j_error; CHECK_NOTHROW(j_error = json::parse(std::begin(v), std::end(v), nullptr, false)); CHECK(j_error.is_discarded()); + + SaxEventLogger l; + CHECK(not json::sax_parse(std::begin(v), std::end(v), &l)); + CHECK(l.events.size() == 1); + CHECK(l.events == std::vector({"parse_error(4)"})); } SECTION("case 8") @@ -353,6 +601,11 @@ TEST_CASE("deserialization") json j_error; CHECK_NOTHROW(j_error = json::parse(std::begin(v), std::end(v), nullptr, false)); CHECK(j_error.is_discarded()); + + SaxEventLogger l; + CHECK(not json::sax_parse(std::begin(v), std::end(v), &l)); + CHECK(l.events.size() == 1); + CHECK(l.events == std::vector({"parse_error(4)"})); } SECTION("case 9") @@ -364,6 +617,11 @@ TEST_CASE("deserialization") json j_error; CHECK_NOTHROW(j_error = json::parse(std::begin(v), std::end(v), nullptr, false)); CHECK(j_error.is_discarded()); + + SaxEventLogger l; + CHECK(not json::sax_parse(std::begin(v), std::end(v), &l)); + CHECK(l.events.size() == 1); + CHECK(l.events == std::vector({"parse_error(4)"})); } SECTION("case 10") @@ -375,6 +633,11 @@ TEST_CASE("deserialization") json j_error; CHECK_NOTHROW(j_error = json::parse(std::begin(v), std::end(v), nullptr, false)); CHECK(j_error.is_discarded()); + + SaxEventLogger l; + CHECK(not json::sax_parse(std::begin(v), std::end(v), &l)); + CHECK(l.events.size() == 1); + CHECK(l.events == std::vector({"parse_error(4)"})); } SECTION("case 11") @@ -386,6 +649,11 @@ TEST_CASE("deserialization") json j_error; CHECK_NOTHROW(j_error = json::parse(std::begin(v), std::end(v), nullptr, false)); CHECK(j_error.is_discarded()); + + SaxEventLogger l; + CHECK(not json::sax_parse(std::begin(v), std::end(v), &l)); + CHECK(l.events.size() == 1); + CHECK(l.events == std::vector({"parse_error(4)"})); } SECTION("case 12") @@ -397,6 +665,11 @@ TEST_CASE("deserialization") json j_error; CHECK_NOTHROW(j_error = json::parse(std::begin(v), std::end(v), nullptr, false)); CHECK(j_error.is_discarded()); + + SaxEventLogger l; + CHECK(not json::sax_parse(std::begin(v), std::end(v), &l)); + CHECK(l.events.size() == 1); + CHECK(l.events == std::vector({"parse_error(4)"})); } SECTION("case 13") @@ -408,6 +681,11 @@ TEST_CASE("deserialization") json j_error; CHECK_NOTHROW(j_error = json::parse(std::begin(v), std::end(v), nullptr, false)); CHECK(j_error.is_discarded()); + + SaxEventLogger l; + CHECK(not json::sax_parse(std::begin(v), std::end(v), &l)); + CHECK(l.events.size() == 1); + CHECK(l.events == std::vector({"parse_error(4)"})); } SECTION("case 14") @@ -419,6 +697,11 @@ TEST_CASE("deserialization") json j_error; CHECK_NOTHROW(j_error = json::parse(std::begin(v), std::end(v), nullptr, false)); CHECK(j_error.is_discarded()); + + SaxEventLogger l; + CHECK(not json::sax_parse(std::begin(v), std::end(v), &l)); + CHECK(l.events.size() == 1); + CHECK(l.events == std::vector({"parse_error(4)"})); } SECTION("case 15") @@ -430,6 +713,11 @@ TEST_CASE("deserialization") json j_error; CHECK_NOTHROW(j_error = json::parse(std::begin(v), std::end(v), nullptr, false)); CHECK(j_error.is_discarded()); + + SaxEventLogger l; + CHECK(not json::sax_parse(std::begin(v), std::end(v), &l)); + CHECK(l.events.size() == 1); + CHECK(l.events == std::vector({"parse_error(4)"})); } SECTION("case 16") @@ -441,6 +729,15 @@ TEST_CASE("deserialization") json j_error; CHECK_NOTHROW(j_error = json::parse(std::begin(v), std::end(v), nullptr, false)); CHECK(j_error.is_discarded()); + + SaxEventLogger l; + CHECK(not json::sax_parse(std::begin(v), std::end(v), &l)); + CHECK(l.events.size() == 3); + CHECK(l.events == std::vector( + { + "start_object(18446744073709551615)", "number_unsigned(11)", + "parse_error(7)" + })); } } } @@ -458,12 +755,34 @@ TEST_CASE("deserialization") CHECK_THROWS_AS(json::parse(std::istringstream(bom)), json::parse_error&); CHECK_THROWS_WITH(json::parse(std::istringstream(bom)), "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal"); + + SaxEventLogger l; + CHECK(not json::sax_parse(bom, &l)); + CHECK(l.events.size() == 1); + CHECK(l.events == std::vector( + { + "parse_error(1)" + })); } SECTION("BOM and content") { CHECK(json::parse(bom + "1") == 1); CHECK(json::parse(std::istringstream(bom + "1")) == 1); + + SaxEventLogger l1, l2; + CHECK(json::sax_parse(std::istringstream(bom + "1"), &l1)); + CHECK(json::sax_parse(bom + "1", &l2)); + CHECK(l1.events.size() == 1); + CHECK(l1.events == std::vector( + { + "number_unsigned(1)" + })); + CHECK(l2.events.size() == 1); + CHECK(l2.events == std::vector( + { + "number_unsigned(1)" + })); } SECTION("2 byte of BOM") @@ -475,6 +794,20 @@ TEST_CASE("deserialization") CHECK_THROWS_AS(json::parse(std::istringstream(bom.substr(0, 2))), json::parse_error&); CHECK_THROWS_WITH(json::parse(std::istringstream(bom)), "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal"); + + SaxEventLogger l1, l2; + CHECK(not json::sax_parse(std::istringstream(bom.substr(0, 2)), &l1)); + CHECK(not json::sax_parse(bom.substr(0, 2), &l2)); + CHECK(l1.events.size() == 1); + CHECK(l1.events == std::vector( + { + "parse_error(1)" + })); + CHECK(l2.events.size() == 1); + CHECK(l2.events == std::vector( + { + "parse_error(1)" + })); } SECTION("1 byte of BOM") @@ -486,6 +819,20 @@ TEST_CASE("deserialization") CHECK_THROWS_AS(json::parse(std::istringstream(bom.substr(0, 1))), json::parse_error&); CHECK_THROWS_WITH(json::parse(std::istringstream(bom)), "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal"); + + SaxEventLogger l1, l2; + CHECK(not json::sax_parse(std::istringstream(bom.substr(0, 1)), &l1)); + CHECK(not json::sax_parse(bom.substr(0, 1), &l2)); + CHECK(l1.events.size() == 1); + CHECK(l1.events == std::vector( + { + "parse_error(1)" + })); + CHECK(l2.events.size() == 1); + CHECK(l2.events == std::vector( + { + "parse_error(1)" + })); } SECTION("variations") @@ -513,12 +860,28 @@ TEST_CASE("deserialization") // without any variation, we skip the BOM CHECK(json::parse(s + "null") == json()); CHECK(json::parse(std::istringstream(s + "null")) == json()); + + SaxEventLogger l; + CHECK(json::sax_parse(s + "null", &l)); + CHECK(l.events.size() == 1); + CHECK(l.events == std::vector( + { + "null()" + })); } else { // any variation is an error CHECK_THROWS_AS(json::parse(s + "null"), json::parse_error&); CHECK_THROWS_AS(json::parse(std::istringstream(s + "null")), json::parse_error&); + + SaxEventLogger l; + CHECK(not json::sax_parse(s + "null", &l)); + CHECK(l.events.size() == 1); + CHECK(l.events == std::vector( + { + "parse_error(1)" + })); } } } From ac230e8b4b0428ee05abf01136288a69939a7a9b Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Sun, 25 Feb 2018 10:44:47 +0100 Subject: [PATCH 02/43] :hammer: fixed test cases to be more robust --- test/src/unit-deserialization.cpp | 40 +++++++++++++++++++++---------- 1 file changed, 27 insertions(+), 13 deletions(-) diff --git a/test/src/unit-deserialization.cpp b/test/src/unit-deserialization.cpp index fd42af2f..64cb243d 100644 --- a/test/src/unit-deserialization.cpp +++ b/test/src/unit-deserialization.cpp @@ -75,7 +75,14 @@ class SaxEventLogger : public nlohmann::json::SAX bool start_object(std::size_t elements) override { - events.push_back("start_object(" + std::to_string(elements) + ")"); + if (elements == -1) + { + events.push_back("start_object()"); + } + else + { + events.push_back("start_object(" + std::to_string(elements) + ")"); + } return true; } @@ -93,7 +100,14 @@ class SaxEventLogger : public nlohmann::json::SAX bool start_array(std::size_t elements) override { - events.push_back("start_array(" + std::to_string(elements) + ")"); + if (elements == -1) + { + events.push_back("start_array()"); + } + else + { + events.push_back("start_array(" + std::to_string(elements) + ")"); + } return true; } @@ -137,9 +151,9 @@ TEST_CASE("deserialization") CHECK(l.events.size() == 10); CHECK(l.events == std::vector( { - "start_array(18446744073709551615)", "string(foo)", + "start_array()", "string(foo)", "number_unsigned(1)", "number_unsigned(2)", "number_unsigned(3)", - "boolean(false)", "start_object(18446744073709551615)", + "boolean(false)", "start_object()", "number_unsigned(1)", "end_object()", "end_array()" })); } @@ -156,9 +170,9 @@ TEST_CASE("deserialization") CHECK(l.events.size() == 10); CHECK(l.events == std::vector( { - "start_array(18446744073709551615)", "string(foo)", + "start_array()", "string(foo)", "number_unsigned(1)", "number_unsigned(2)", "number_unsigned(3)", - "boolean(false)", "start_object(18446744073709551615)", + "boolean(false)", "start_object()", "number_unsigned(1)", "end_object()", "end_array()" })); } @@ -175,9 +189,9 @@ TEST_CASE("deserialization") CHECK(l.events.size() == 10); CHECK(l.events == std::vector( { - "start_array(18446744073709551615)", "string(foo)", + "start_array()", "string(foo)", "number_unsigned(1)", "number_unsigned(2)", "number_unsigned(3)", - "boolean(false)", "start_object(18446744073709551615)", + "boolean(false)", "start_object()", "number_unsigned(1)", "end_object()", "end_array()" })); } @@ -230,9 +244,9 @@ TEST_CASE("deserialization") CHECK(l.events.size() == 10); CHECK(l.events == std::vector( { - "start_array(18446744073709551615)", "string(foo)", + "start_array()", "string(foo)", "number_unsigned(1)", "number_unsigned(2)", "number_unsigned(3)", - "boolean(false)", "start_object(18446744073709551615)", + "boolean(false)", "start_object()", "number_unsigned(1)", "end_object()", "parse_error(29)" })); } @@ -254,9 +268,9 @@ TEST_CASE("deserialization") CHECK(l.events.size() == 10); CHECK(l.events == std::vector( { - "start_array(18446744073709551615)", "string(foo)", + "start_array()", "string(foo)", "number_unsigned(1)", "number_unsigned(2)", "number_unsigned(3)", - "boolean(false)", "start_object(18446744073709551615)", + "boolean(false)", "start_object()", "number_unsigned(1)", "end_object()", "parse_error(29)" })); } @@ -735,7 +749,7 @@ TEST_CASE("deserialization") CHECK(l.events.size() == 3); CHECK(l.events == std::vector( { - "start_object(18446744073709551615)", "number_unsigned(11)", + "start_object()", "number_unsigned(11)", "parse_error(7)" })); } From 922f7a3d0eca33fc359ac2b64a258227da44aefb Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Sun, 25 Feb 2018 14:21:30 +0100 Subject: [PATCH 03/43] :white_check_mark: added more tests for SAX parsing --- test/src/unit-class_parser.cpp | 109 ++++++++++++++++++++++++++++++++- 1 file changed, 108 insertions(+), 1 deletion(-) diff --git a/test/src/unit-class_parser.cpp b/test/src/unit-class_parser.cpp index 3e309469..27c1ee8d 100644 --- a/test/src/unit-class_parser.cpp +++ b/test/src/unit-class_parser.cpp @@ -34,6 +34,106 @@ using nlohmann::json; #include +class SaxEventLogger : public nlohmann::json::SAX +{ + public: + bool null() override + { + events.push_back("null()"); + return true; + } + + bool boolean(bool val) override + { + events.push_back(val ? "boolean(true)" : "boolean(false)"); + return true; + } + + bool number_integer(json::number_integer_t val) override + { + events.push_back("number_integer(" + std::to_string(val) + ")"); + return true; + } + + bool number_unsigned(json::number_unsigned_t val) override + { + events.push_back("number_unsigned(" + std::to_string(val) + ")"); + return true; + } + + bool number_float(json::number_float_t val, const std::string& s) override + { + events.push_back("number_float(" + s + ")"); + return true; + } + + bool string(const std::string& val) override + { + events.push_back("string(" + val + ")"); + return true; + } + + bool start_object(std::size_t elements) override + { + if (elements == -1) + { + events.push_back("start_object()"); + } + else + { + events.push_back("start_object(" + std::to_string(elements) + ")"); + } + return true; + } + + bool key(const std::string& val) override + { + events.push_back("key(" + val + ")"); + return true; + } + + bool end_object()override + { + events.push_back("end_object()"); + return true; + } + + bool start_array(std::size_t elements) override + { + if (elements == -1) + { + events.push_back("start_array()"); + } + else + { + events.push_back("start_array(" + std::to_string(elements) + ")"); + } + return true; + } + + bool end_array() override + { + events.push_back("end_array()"); + return true; + } + + bool binary(const std::vector& vec) override + { + events.push_back("binary()"); + return true; + } + + bool parse_error(int position, const std::string& last_token) override + { + errored = true; + events.push_back("parse_error(" + std::to_string(position) + ")"); + return false; + } + + std::vector events; + bool errored = false; +}; + json parser_helper(const std::string& s); bool accept_helper(const std::string& s); @@ -53,6 +153,8 @@ json parser_helper(const std::string& s) bool accept_helper(const std::string& s) { + CAPTURE(s); + // 1. parse s without exceptions json j; CHECK_NOTHROW(json::parser(nlohmann::detail::input_adapter(s), nullptr, false).parse(true, j)); @@ -64,7 +166,12 @@ bool accept_helper(const std::string& s) // 3. check if both approaches come to the same result CHECK(ok_noexcept == ok_accept); - // 4. return result + // 4. parse with SAX (compare with relaxed accept result) + SaxEventLogger el; + CHECK_NOTHROW(json::sax_parse(s, &el)); + CHECK(json::parser(nlohmann::detail::input_adapter(s)).accept(false) == not el.errored); + + // 5. return result return ok_accept; } From 8c7f46f7d0291fe2d022dc5c2eb77bab213ed8eb Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Sun, 25 Feb 2018 17:10:30 +0100 Subject: [PATCH 04/43] :hammer: removed a logic error and improved coverage --- include/nlohmann/detail/input/parser.hpp | 30 +++-- include/nlohmann/json.hpp | 3 +- single_include/nlohmann/json.hpp | 33 ++++-- test/src/unit-deserialization.cpp | 143 ++++++++++++++++++----- 4 files changed, 156 insertions(+), 53 deletions(-) diff --git a/include/nlohmann/detail/input/parser.hpp b/include/nlohmann/detail/input/parser.hpp index 009ea994..1cd6868f 100644 --- a/include/nlohmann/detail/input/parser.hpp +++ b/include/nlohmann/detail/input/parser.hpp @@ -594,7 +594,7 @@ class parser get_token(); // closing } -> we are done - if (last_token == token_type::end_object) + if (JSON_UNLIKELY(last_token == token_type::end_object)) { return sax->end_object(); } @@ -603,7 +603,12 @@ class parser while (true) { // parse key - if (last_token != token_type::value_string) + if (JSON_UNLIKELY(last_token != token_type::value_string)) + { + return sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string()); + } + else { if (not sax->key(m_lexer.move_string())) { @@ -613,9 +618,10 @@ class parser // parse separator (:) get_token(); - if (last_token != token_type::name_separator) + if (JSON_UNLIKELY(last_token != token_type::name_separator)) { - return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string()); + return sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string()); } // parse value @@ -634,13 +640,14 @@ class parser } // closing } - if (last_token == token_type::end_object) + if (JSON_LIKELY(last_token == token_type::end_object)) { return sax->end_object(); } else { - return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string()); + return sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string()); } } } @@ -679,13 +686,14 @@ class parser } // closing ] - if (last_token == token_type::end_array) + if (JSON_LIKELY(last_token == token_type::end_array)) { return sax->end_array(); } else { - return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string()); + return sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string()); } } } @@ -696,7 +704,8 @@ class parser if (JSON_UNLIKELY(not std::isfinite(res))) { - return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string()); + return sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string()); } else { @@ -736,7 +745,8 @@ class parser default: // the last token was unexpected { - return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string()); + return sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string()); } } } diff --git a/include/nlohmann/json.hpp b/include/nlohmann/json.hpp index e43d37e7..9515d29e 100644 --- a/include/nlohmann/json.hpp +++ b/include/nlohmann/json.hpp @@ -1054,8 +1054,6 @@ class basic_json */ using parse_event_t = typename parser::parse_event_t; - using SAX = typename parser::SAX; - /*! @brief per-element parser callback type @@ -1107,6 +1105,7 @@ class basic_json */ using parser_callback_t = typename parser::parser_callback_t; + using SAX = typename parser::SAX; ////////////////// // constructors // diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index 53b03421..79581142 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -3717,7 +3717,7 @@ class parser get_token(); // closing } -> we are done - if (last_token == token_type::end_object) + if (JSON_UNLIKELY(last_token == token_type::end_object)) { return sax->end_object(); } @@ -3726,7 +3726,12 @@ class parser while (true) { // parse key - if (last_token != token_type::value_string) + if (JSON_UNLIKELY(last_token != token_type::value_string)) + { + return sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string()); + } + else { if (not sax->key(m_lexer.move_string())) { @@ -3736,9 +3741,10 @@ class parser // parse separator (:) get_token(); - if (last_token != token_type::name_separator) + if (JSON_UNLIKELY(last_token != token_type::name_separator)) { - return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string()); + return sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string()); } // parse value @@ -3757,13 +3763,14 @@ class parser } // closing } - if (last_token == token_type::end_object) + if (JSON_LIKELY(last_token == token_type::end_object)) { return sax->end_object(); } else { - return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string()); + return sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string()); } } } @@ -3802,13 +3809,14 @@ class parser } // closing ] - if (last_token == token_type::end_array) + if (JSON_LIKELY(last_token == token_type::end_array)) { return sax->end_array(); } else { - return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string()); + return sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string()); } } } @@ -3819,7 +3827,8 @@ class parser if (JSON_UNLIKELY(not std::isfinite(res))) { - return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string()); + return sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string()); } else { @@ -3859,7 +3868,8 @@ class parser default: // the last token was unexpected { - return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string()); + return sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string()); } } } @@ -10875,8 +10885,6 @@ class basic_json */ using parse_event_t = typename parser::parse_event_t; - using SAX = typename parser::SAX; - /*! @brief per-element parser callback type @@ -10928,6 +10936,7 @@ class basic_json */ using parser_callback_t = typename parser::parser_callback_t; + using SAX = typename parser::SAX; ////////////////// // constructors // diff --git a/test/src/unit-deserialization.cpp b/test/src/unit-deserialization.cpp index 64cb243d..9f0bc175 100644 --- a/test/src/unit-deserialization.cpp +++ b/test/src/unit-deserialization.cpp @@ -34,9 +34,8 @@ using nlohmann::json; #include #include -class SaxEventLogger : public nlohmann::json::SAX +struct SaxEventLogger : public nlohmann::json::SAX { - public: bool null() override { events.push_back("null()"); @@ -132,6 +131,47 @@ class SaxEventLogger : public nlohmann::json::SAX std::vector events; }; +struct SaxEventLoggerExitAfterStartObject : public SaxEventLogger +{ + bool start_object(std::size_t elements) override + { + if (elements == -1) + { + events.push_back("start_object()"); + } + else + { + events.push_back("start_object(" + std::to_string(elements) + ")"); + } + return false; + } +}; + +struct SaxEventLoggerExitAfterKey : public SaxEventLogger +{ + bool key(const std::string& val) override + { + events.push_back("key(" + val + ")"); + return false; + } +}; + +struct SaxEventLoggerExitAfterStartArray : public SaxEventLogger +{ + bool start_array(std::size_t elements) override + { + if (elements == -1) + { + events.push_back("start_array()"); + } + else + { + events.push_back("start_array(" + std::to_string(elements) + ")"); + } + return false; + } +}; + TEST_CASE("deserialization") { SECTION("successful deserialization") @@ -148,13 +188,13 @@ TEST_CASE("deserialization") SaxEventLogger l; CHECK(json::sax_parse(ss3, &l)); - CHECK(l.events.size() == 10); + CHECK(l.events.size() == 11); CHECK(l.events == std::vector( { - "start_array()", "string(foo)", - "number_unsigned(1)", "number_unsigned(2)", "number_unsigned(3)", - "boolean(false)", "start_object()", - "number_unsigned(1)", "end_object()", "end_array()" + "start_array()", "string(foo)", "number_unsigned(1)", + "number_unsigned(2)", "number_unsigned(3)", "boolean(false)", + "start_object()", "key(one)", "number_unsigned(1)", + "end_object()", "end_array()" })); } @@ -167,13 +207,13 @@ TEST_CASE("deserialization") SaxEventLogger l; CHECK(json::sax_parse(s, &l)); - CHECK(l.events.size() == 10); + CHECK(l.events.size() == 11); CHECK(l.events == std::vector( { - "start_array()", "string(foo)", - "number_unsigned(1)", "number_unsigned(2)", "number_unsigned(3)", - "boolean(false)", "start_object()", - "number_unsigned(1)", "end_object()", "end_array()" + "start_array()", "string(foo)", "number_unsigned(1)", + "number_unsigned(2)", "number_unsigned(3)", "boolean(false)", + "start_object()", "key(one)", "number_unsigned(1)", + "end_object()", "end_array()" })); } @@ -186,13 +226,13 @@ TEST_CASE("deserialization") SaxEventLogger l; CHECK(json::sax_parse(s, &l)); - CHECK(l.events.size() == 10); + CHECK(l.events.size() == 11); CHECK(l.events == std::vector( { - "start_array()", "string(foo)", - "number_unsigned(1)", "number_unsigned(2)", "number_unsigned(3)", - "boolean(false)", "start_object()", - "number_unsigned(1)", "end_object()", "end_array()" + "start_array()", "string(foo)", "number_unsigned(1)", + "number_unsigned(2)", "number_unsigned(3)", "boolean(false)", + "start_object()", "key(one)", "number_unsigned(1)", + "end_object()", "end_array()" })); } @@ -241,13 +281,13 @@ TEST_CASE("deserialization") SaxEventLogger l; CHECK(not json::sax_parse(ss5, &l)); - CHECK(l.events.size() == 10); + CHECK(l.events.size() == 11); CHECK(l.events == std::vector( { - "start_array()", "string(foo)", - "number_unsigned(1)", "number_unsigned(2)", "number_unsigned(3)", - "boolean(false)", "start_object()", - "number_unsigned(1)", "end_object()", "parse_error(29)" + "start_array()", "string(foo)", "number_unsigned(1)", + "number_unsigned(2)", "number_unsigned(3)", "boolean(false)", + "start_object()", "key(one)", "number_unsigned(1)", + "end_object()", "parse_error(29)" })); } @@ -265,13 +305,13 @@ TEST_CASE("deserialization") SaxEventLogger l; CHECK(not json::sax_parse(s, &l)); - CHECK(l.events.size() == 10); + CHECK(l.events.size() == 11); CHECK(l.events == std::vector( { - "start_array()", "string(foo)", - "number_unsigned(1)", "number_unsigned(2)", "number_unsigned(3)", - "boolean(false)", "start_object()", - "number_unsigned(1)", "end_object()", "parse_error(29)" + "start_array()", "string(foo)", "number_unsigned(1)", + "number_unsigned(2)", "number_unsigned(3)", "boolean(false)", + "start_object()", "key(one)", "number_unsigned(1)", + "end_object()", "parse_error(29)" })); } @@ -746,10 +786,10 @@ TEST_CASE("deserialization") SaxEventLogger l; CHECK(not json::sax_parse(std::begin(v), std::end(v), &l)); - CHECK(l.events.size() == 3); + CHECK(l.events.size() == 4); CHECK(l.events == std::vector( { - "start_object()", "number_unsigned(11)", + "start_object()", "key()", "number_unsigned(11)", "parse_error(7)" })); } @@ -912,4 +952,49 @@ TEST_CASE("deserialization") CHECK(j == 456); } } + + SECTION("SAX and early abort") + { + std::string s = "[1, [\"string\", 43.12], null, {\"key1\": true, \"key2\": false}]"; + + SaxEventLogger default_logger; + SaxEventLoggerExitAfterStartObject exit_after_start_object; + SaxEventLoggerExitAfterKey exit_after_key; + SaxEventLoggerExitAfterStartArray exit_after_start_array; + + json::sax_parse(s, &default_logger); + CHECK(default_logger.events.size() == 14); + CHECK(default_logger.events == std::vector( + { + "start_array()", "number_unsigned(1)", "start_array()", + "string(string)", "number_float(43.12)", "end_array()", "null()", + "start_object()", "key(key1)", "boolean(true)", "key(key2)", + "boolean(false)", "end_object()", "end_array()" + })); + + json::sax_parse(s, &exit_after_start_object); + CHECK(exit_after_start_object.events.size() == 8); + CHECK(exit_after_start_object.events == std::vector( + { + "start_array()", "number_unsigned(1)", "start_array()", + "string(string)", "number_float(43.12)", "end_array()", "null()", + "start_object()" + })); + + json::sax_parse(s, &exit_after_key); + CHECK(exit_after_key.events.size() == 9); + CHECK(exit_after_key.events == std::vector( + { + "start_array()", "number_unsigned(1)", "start_array()", + "string(string)", "number_float(43.12)", "end_array()", "null()", + "start_object()", "key(key1)" + })); + + json::sax_parse(s, &exit_after_start_array); + CHECK(exit_after_start_array.events.size() == 1); + CHECK(exit_after_start_array.events == std::vector( + { + "start_array()" + })); + } } From 8d6b3d44d6255a540c620ad4c683b4a669b6acdc Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Sun, 25 Feb 2018 18:35:16 +0100 Subject: [PATCH 05/43] :ok_hand: fixed some compiler warnings --- include/nlohmann/detail/input/parser.hpp | 8 +++++--- single_include/nlohmann/json.hpp | 8 +++++--- test/src/unit-class_parser.cpp | 10 +++++----- test/src/unit-deserialization.cpp | 14 +++++++------- 4 files changed, 22 insertions(+), 18 deletions(-) diff --git a/include/nlohmann/detail/input/parser.hpp b/include/nlohmann/detail/input/parser.hpp index 1cd6868f..67bdcd49 100644 --- a/include/nlohmann/detail/input/parser.hpp +++ b/include/nlohmann/detail/input/parser.hpp @@ -96,7 +96,9 @@ class parser /// a parse error occurred /// the byte position and the last token are reported - virtual bool parse_error(int position, const std::string& last_token) = 0; + virtual bool parse_error(std::size_t position, const std::string& last_token) = 0; + + virtual ~SAX() = default; }; using parser_callback_t = @@ -585,7 +587,7 @@ class parser { case token_type::begin_object: { - if (not sax->start_object(-1)) + if (not sax->start_object(std::size_t(-1))) { return false; } @@ -654,7 +656,7 @@ class parser case token_type::begin_array: { - if (not sax->start_array(-1)) + if (not sax->start_array(std::size_t(-1))) { return false; } diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index 79581142..9c04cd9b 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -3219,7 +3219,9 @@ class parser /// a parse error occurred /// the byte position and the last token are reported - virtual bool parse_error(int position, const std::string& last_token) = 0; + virtual bool parse_error(std::size_t position, const std::string& last_token) = 0; + + virtual ~SAX() = default; }; using parser_callback_t = @@ -3708,7 +3710,7 @@ class parser { case token_type::begin_object: { - if (not sax->start_object(-1)) + if (not sax->start_object(std::size_t(-1))) { return false; } @@ -3777,7 +3779,7 @@ class parser case token_type::begin_array: { - if (not sax->start_array(-1)) + if (not sax->start_array(std::size_t(-1))) { return false; } diff --git a/test/src/unit-class_parser.cpp b/test/src/unit-class_parser.cpp index 27c1ee8d..94acaaa5 100644 --- a/test/src/unit-class_parser.cpp +++ b/test/src/unit-class_parser.cpp @@ -61,7 +61,7 @@ class SaxEventLogger : public nlohmann::json::SAX return true; } - bool number_float(json::number_float_t val, const std::string& s) override + bool number_float(json::number_float_t, const std::string& s) override { events.push_back("number_float(" + s + ")"); return true; @@ -75,7 +75,7 @@ class SaxEventLogger : public nlohmann::json::SAX bool start_object(std::size_t elements) override { - if (elements == -1) + if (elements == std::size_t(-1)) { events.push_back("start_object()"); } @@ -100,7 +100,7 @@ class SaxEventLogger : public nlohmann::json::SAX bool start_array(std::size_t elements) override { - if (elements == -1) + if (elements == std::size_t(-1)) { events.push_back("start_array()"); } @@ -117,13 +117,13 @@ class SaxEventLogger : public nlohmann::json::SAX return true; } - bool binary(const std::vector& vec) override + bool binary(const std::vector&) override { events.push_back("binary()"); return true; } - bool parse_error(int position, const std::string& last_token) override + bool parse_error(std::size_t position, const std::string&) override { errored = true; events.push_back("parse_error(" + std::to_string(position) + ")"); diff --git a/test/src/unit-deserialization.cpp b/test/src/unit-deserialization.cpp index 9f0bc175..603773b9 100644 --- a/test/src/unit-deserialization.cpp +++ b/test/src/unit-deserialization.cpp @@ -60,7 +60,7 @@ struct SaxEventLogger : public nlohmann::json::SAX return true; } - bool number_float(json::number_float_t val, const std::string& s) override + bool number_float(json::number_float_t, const std::string& s) override { events.push_back("number_float(" + s + ")"); return true; @@ -74,7 +74,7 @@ struct SaxEventLogger : public nlohmann::json::SAX bool start_object(std::size_t elements) override { - if (elements == -1) + if (elements == std::size_t(-1)) { events.push_back("start_object()"); } @@ -99,7 +99,7 @@ struct SaxEventLogger : public nlohmann::json::SAX bool start_array(std::size_t elements) override { - if (elements == -1) + if (elements == std::size_t(-1)) { events.push_back("start_array()"); } @@ -116,13 +116,13 @@ struct SaxEventLogger : public nlohmann::json::SAX return true; } - bool binary(const std::vector& vec) override + bool binary(const std::vector&) override { events.push_back("binary()"); return true; } - bool parse_error(int position, const std::string& last_token) override + bool parse_error(std::size_t position, const std::string&) override { events.push_back("parse_error(" + std::to_string(position) + ")"); return false; @@ -135,7 +135,7 @@ struct SaxEventLoggerExitAfterStartObject : public SaxEventLogger { bool start_object(std::size_t elements) override { - if (elements == -1) + if (elements == std::size_t(-1)) { events.push_back("start_object()"); } @@ -160,7 +160,7 @@ struct SaxEventLoggerExitAfterStartArray : public SaxEventLogger { bool start_array(std::size_t elements) override { - if (elements == -1) + if (elements == std::size_t(-1)) { events.push_back("start_array()"); } From 21352c4d8ed7b0d5fef6138f5f665285f112567b Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Mon, 26 Feb 2018 20:08:12 +0100 Subject: [PATCH 06/43] :recycle: refactored SAX parser --- Makefile | 1 + include/nlohmann/detail/input/json_sax.hpp | 120 ++++++++++++++ include/nlohmann/detail/input/parser.hpp | 54 +----- include/nlohmann/json.hpp | 8 +- single_include/nlohmann/json.hpp | 183 +++++++++++++++------ test/src/unit-class_parser.cpp | 2 +- test/src/unit-deserialization.cpp | 2 +- 7 files changed, 260 insertions(+), 110 deletions(-) create mode 100644 include/nlohmann/detail/input/json_sax.hpp diff --git a/Makefile b/Makefile index 63c9cd78..ee9a64f1 100644 --- a/Makefile +++ b/Makefile @@ -9,6 +9,7 @@ SRCS = include/nlohmann/json.hpp \ include/nlohmann/detail/exceptions.hpp \ include/nlohmann/detail/input/binary_reader.hpp \ include/nlohmann/detail/input/input_adapters.hpp \ + include/nlohmann/detail/input/json_sax.hpp \ include/nlohmann/detail/input/lexer.hpp \ include/nlohmann/detail/input/parser.hpp \ include/nlohmann/detail/iterators/internal_iterator.hpp \ diff --git a/include/nlohmann/detail/input/json_sax.hpp b/include/nlohmann/detail/input/json_sax.hpp new file mode 100644 index 00000000..3aad6648 --- /dev/null +++ b/include/nlohmann/detail/input/json_sax.hpp @@ -0,0 +1,120 @@ +#pragma once + +#include +#include +#include + +namespace nlohmann +{ + +/*! +@brief SAX interface +*/ +template +struct json_sax +{ + /// type for (signed) integers + using number_integer_t = typename BasicJsonType::number_integer_t; + /// type for unsigned integers + using number_unsigned_t = typename BasicJsonType::number_unsigned_t; + /// type for floating-point numbers + using number_float_t = typename BasicJsonType::number_float_t; + + /*! + @brief a null value was read + @return whether parsing should proceed + */ + virtual bool null() = 0; + + /*! + @brief a boolean value was read + @param[in] val boolean value + @return whether parsing should proceed + */ + virtual bool boolean(bool val) = 0; + + /*! + @brief an integer number was read + @param[in] val integer value + @return whether parsing should proceed + */ + virtual bool number_integer(number_integer_t val) = 0; + + /*! + @brief an unsigned integer number was read + @param[in] val unsigned integer value + @return whether parsing should proceed + */ + virtual bool number_unsigned(number_unsigned_t val) = 0; + + /*! + @brief an floating-point number was read + @param[in] val floating-point value + @param[in] s raw token value + @return whether parsing should proceed + */ + virtual bool number_float(number_float_t val, const std::string& s) = 0; + + /*! + @brief a string was read + @param[in] val string value + @return whether parsing should proceed + */ + virtual bool string(const std::string& val) = 0; + + /*! + @brief the beginning of an object was read + @param[in] elements number of object elements or -1 if unknown + @return whether parsing should proceed + @note binary formats may report the number of elements + */ + virtual bool start_object(std::size_t elements) = 0; + + /*! + @brief an object key was read + @param[in] val object key + @return whether parsing should proceed + */ + virtual bool key(const std::string& val) = 0; + + /*! + @brief the end of an object was read + @return whether parsing should proceed + */ + virtual bool end_object() = 0; + + /*! + @brief the beginning of an array was read + @param[in] elements number of array elements or -1 if unknown + @return whether parsing should proceed + @note binary formats may report the number of elements + */ + virtual bool start_array(std::size_t elements) = 0; + + /*! + @brief the end of an array was read + @return whether parsing should proceed + */ + virtual bool end_array() = 0; + + /*! + @brief a binary value was read + @param[in] val byte vector + @return whether parsing should proceed + @note examples are CBOR type 2 strings, MessagePack bin, and maybe UBJSON + array + */ + virtual bool binary(const std::vector& val) = 0; + + /*! + @brief a parse error occurred + @param[in] position the position in the input where the error occurs + @param[in] last_token the last read token + @return whether parsing should proceed + */ + virtual bool parse_error(std::size_t position, const std::string& last_token) = 0; + + virtual ~json_sax() = default; +}; +} + diff --git a/include/nlohmann/detail/input/parser.hpp b/include/nlohmann/detail/input/parser.hpp index 67bdcd49..aa84a2be 100644 --- a/include/nlohmann/detail/input/parser.hpp +++ b/include/nlohmann/detail/input/parser.hpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -52,54 +53,7 @@ class parser value }; - struct SAX - { - /// a null value was read - virtual bool null() = 0; - - /// a boolean value was read - virtual bool boolean(bool) = 0; - - /// an integer number was read - virtual bool number_integer(number_integer_t) = 0; - - /// an unsigned integer number was read - virtual bool number_unsigned(number_unsigned_t) = 0; - - /// a floating-point number was read - /// the string parameter contains the raw number value - virtual bool number_float(number_float_t, const std::string&) = 0; - - /// a string value was read - virtual bool string(const std::string&) = 0; - - /// the beginning of an object was read - /// binary formats may report the number of elements - virtual bool start_object(std::size_t elements) = 0; - - /// an object key was read - virtual bool key(const std::string&) = 0; - - /// the end of an object was read - virtual bool end_object() = 0; - - /// the beginning of an array was read - /// binary formats may report the number of elements - virtual bool start_array(std::size_t elements) = 0; - - /// the end of an array was read - virtual bool end_array() = 0; - - /// a binary value was read - /// examples are CBOR type 2 strings, MessagePack bin, and maybe UBJSON array - virtual bool binary(const std::vector& vec) = 0; - - /// a parse error occurred - /// the byte position and the last token are reported - virtual bool parse_error(std::size_t position, const std::string& last_token) = 0; - - virtual ~SAX() = default; - }; + using json_sax = json_sax; using parser_callback_t = std::function; @@ -111,7 +65,7 @@ class parser : callback(cb), m_lexer(adapter), allow_exceptions(allow_exceptions_) {} - parser(detail::input_adapter_t adapter, SAX* s) + parser(detail::input_adapter_t adapter, json_sax* s) : m_lexer(adapter), sax(s) {} @@ -818,7 +772,7 @@ class parser /// whether to throw exceptions in case of errors const bool allow_exceptions = true; /// associated SAX parse event receiver - SAX* sax = nullptr; + json_sax* sax = nullptr; }; } } diff --git a/include/nlohmann/json.hpp b/include/nlohmann/json.hpp index 9515d29e..8f5aee0c 100644 --- a/include/nlohmann/json.hpp +++ b/include/nlohmann/json.hpp @@ -1105,7 +1105,7 @@ class basic_json */ using parser_callback_t = typename parser::parser_callback_t; - using SAX = typename parser::SAX; + using json_sax = typename parser::json_sax; ////////////////// // constructors // @@ -5926,12 +5926,12 @@ class basic_json return parser(i).accept(true); } - static bool sax_parse(detail::input_adapter i, SAX* sax) + static bool sax_parse(detail::input_adapter i, json_sax* sax) { return parser(i, sax).sax_parse(); } - static bool sax_parse(detail::input_adapter& i, SAX* sax) + static bool sax_parse(detail::input_adapter& i, json_sax* sax) { return parser(i, sax).sax_parse(); } @@ -6009,7 +6009,7 @@ class basic_json std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits::iterator_category>::value, int>::type = 0> - static bool sax_parse(IteratorType first, IteratorType last, SAX* sax) + static bool sax_parse(IteratorType first, IteratorType last, json_sax* sax) { return parser(detail::input_adapter(first, last), sax).sax_parse(); } diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index 9c04cd9b..01e77f41 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -3131,6 +3131,128 @@ scan_number_done: // #include +// #include + + +#include +#include +#include + +namespace nlohmann +{ + +/*! +@brief SAX interface +*/ +template +struct json_sax +{ + /// type for (signed) integers + using number_integer_t = typename BasicJsonType::number_integer_t; + /// type for unsigned integers + using number_unsigned_t = typename BasicJsonType::number_unsigned_t; + /// type for floating-point numbers + using number_float_t = typename BasicJsonType::number_float_t; + + /*! + @brief a null value was read + @return whether parsing should proceed + */ + virtual bool null() = 0; + + /*! + @brief a boolean value was read + @param[in] val boolean value + @return whether parsing should proceed + */ + virtual bool boolean(bool val) = 0; + + /*! + @brief an integer number was read + @param[in] val integer value + @return whether parsing should proceed + */ + virtual bool number_integer(number_integer_t val) = 0; + + /*! + @brief an unsigned integer number was read + @param[in] val unsigned integer value + @return whether parsing should proceed + */ + virtual bool number_unsigned(number_unsigned_t val) = 0; + + /*! + @brief an floating-point number was read + @param[in] val floating-point value + @param[in] s raw token value + @return whether parsing should proceed + */ + virtual bool number_float(number_float_t val, const std::string& s) = 0; + + /*! + @brief a string was read + @param[in] val string value + @return whether parsing should proceed + */ + virtual bool string(const std::string& val) = 0; + + /*! + @brief the beginning of an object was read + @param[in] elements number of object elements or -1 if unknown + @return whether parsing should proceed + @note binary formats may report the number of elements + */ + virtual bool start_object(std::size_t elements) = 0; + + /*! + @brief an object key was read + @param[in] val object key + @return whether parsing should proceed + */ + virtual bool key(const std::string& val) = 0; + + /*! + @brief the end of an object was read + @return whether parsing should proceed + */ + virtual bool end_object() = 0; + + /*! + @brief the beginning of an array was read + @param[in] elements number of array elements or -1 if unknown + @return whether parsing should proceed + @note binary formats may report the number of elements + */ + virtual bool start_array(std::size_t elements) = 0; + + /*! + @brief the end of an array was read + @return whether parsing should proceed + */ + virtual bool end_array() = 0; + + /*! + @brief a binary value was read + @param[in] val byte vector + @return whether parsing should proceed + @note examples are CBOR type 2 strings, MessagePack bin, and maybe UBJSON + array + */ + virtual bool binary(const std::vector& val) = 0; + + /*! + @brief a parse error occurred + @param[in] position the position in the input where the error occurs + @param[in] last_token the last read token + @return whether parsing should proceed + */ + virtual bool parse_error(std::size_t position, const std::string& last_token) = 0; + + virtual ~json_sax() = default; +}; +} + + // #include // #include @@ -3175,54 +3297,7 @@ class parser value }; - struct SAX - { - /// a null value was read - virtual bool null() = 0; - - /// a boolean value was read - virtual bool boolean(bool) = 0; - - /// an integer number was read - virtual bool number_integer(number_integer_t) = 0; - - /// an unsigned integer number was read - virtual bool number_unsigned(number_unsigned_t) = 0; - - /// a floating-point number was read - /// the string parameter contains the raw number value - virtual bool number_float(number_float_t, const std::string&) = 0; - - /// a string value was read - virtual bool string(const std::string&) = 0; - - /// the beginning of an object was read - /// binary formats may report the number of elements - virtual bool start_object(std::size_t elements) = 0; - - /// an object key was read - virtual bool key(const std::string&) = 0; - - /// the end of an object was read - virtual bool end_object() = 0; - - /// the beginning of an array was read - /// binary formats may report the number of elements - virtual bool start_array(std::size_t elements) = 0; - - /// the end of an array was read - virtual bool end_array() = 0; - - /// a binary value was read - /// examples are CBOR type 2 strings, MessagePack bin, and maybe UBJSON array - virtual bool binary(const std::vector& vec) = 0; - - /// a parse error occurred - /// the byte position and the last token are reported - virtual bool parse_error(std::size_t position, const std::string& last_token) = 0; - - virtual ~SAX() = default; - }; + using json_sax = json_sax; using parser_callback_t = std::function; @@ -3234,7 +3309,7 @@ class parser : callback(cb), m_lexer(adapter), allow_exceptions(allow_exceptions_) {} - parser(detail::input_adapter_t adapter, SAX* s) + parser(detail::input_adapter_t adapter, json_sax* s) : m_lexer(adapter), sax(s) {} @@ -3941,7 +4016,7 @@ class parser /// whether to throw exceptions in case of errors const bool allow_exceptions = true; /// associated SAX parse event receiver - SAX* sax = nullptr; + json_sax* sax = nullptr; }; } } @@ -10938,7 +11013,7 @@ class basic_json */ using parser_callback_t = typename parser::parser_callback_t; - using SAX = typename parser::SAX; + using json_sax = typename parser::json_sax; ////////////////// // constructors // @@ -15759,12 +15834,12 @@ class basic_json return parser(i).accept(true); } - static bool sax_parse(detail::input_adapter i, SAX* sax) + static bool sax_parse(detail::input_adapter i, json_sax* sax) { return parser(i, sax).sax_parse(); } - static bool sax_parse(detail::input_adapter& i, SAX* sax) + static bool sax_parse(detail::input_adapter& i, json_sax* sax) { return parser(i, sax).sax_parse(); } @@ -15842,7 +15917,7 @@ class basic_json std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits::iterator_category>::value, int>::type = 0> - static bool sax_parse(IteratorType first, IteratorType last, SAX* sax) + static bool sax_parse(IteratorType first, IteratorType last, json_sax* sax) { return parser(detail::input_adapter(first, last), sax).sax_parse(); } diff --git a/test/src/unit-class_parser.cpp b/test/src/unit-class_parser.cpp index 94acaaa5..cfa90f26 100644 --- a/test/src/unit-class_parser.cpp +++ b/test/src/unit-class_parser.cpp @@ -34,7 +34,7 @@ using nlohmann::json; #include -class SaxEventLogger : public nlohmann::json::SAX +class SaxEventLogger : public nlohmann::json::json_sax { public: bool null() override diff --git a/test/src/unit-deserialization.cpp b/test/src/unit-deserialization.cpp index 603773b9..e48d6348 100644 --- a/test/src/unit-deserialization.cpp +++ b/test/src/unit-deserialization.cpp @@ -34,7 +34,7 @@ using nlohmann::json; #include #include -struct SaxEventLogger : public nlohmann::json::SAX +struct SaxEventLogger : public nlohmann::json::json_sax { bool null() override { From 3ff94553325b98ab03c70deac689f71b4eb790d8 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Mon, 26 Feb 2018 23:39:23 +0100 Subject: [PATCH 07/43] :hammer: added a SAX-DOM-Parser --- include/nlohmann/detail/input/json_sax.hpp | 15 ++- include/nlohmann/detail/input/lexer.hpp | 2 +- include/nlohmann/detail/input/parser.hpp | 10 +- include/nlohmann/json.hpp | 8 +- single_include/nlohmann/json.hpp | 35 +++--- test/src/unit-class_parser.cpp | 139 ++++++++++++++++++++- test/src/unit-deserialization.cpp | 12 +- 7 files changed, 177 insertions(+), 44 deletions(-) diff --git a/include/nlohmann/detail/input/json_sax.hpp b/include/nlohmann/detail/input/json_sax.hpp index 3aad6648..87f2119f 100644 --- a/include/nlohmann/detail/input/json_sax.hpp +++ b/include/nlohmann/detail/input/json_sax.hpp @@ -20,6 +20,9 @@ struct json_sax /// type for floating-point numbers using number_float_t = typename BasicJsonType::number_float_t; + /// constant to indicate that no size limit is given for array or object + static constexpr auto no_limit = std::size_t(-1); + /*! @brief a null value was read @return whether parsing should proceed @@ -60,22 +63,22 @@ struct json_sax @param[in] val string value @return whether parsing should proceed */ - virtual bool string(const std::string& val) = 0; + virtual bool string(std::string&& val) = 0; /*! @brief the beginning of an object was read - @param[in] elements number of object elements or -1 if unknown + @param[in] elements number of object elements or no_limit if unknown @return whether parsing should proceed @note binary formats may report the number of elements */ - virtual bool start_object(std::size_t elements) = 0; + virtual bool start_object(std::size_t elements = no_limit) = 0; /*! @brief an object key was read @param[in] val object key @return whether parsing should proceed */ - virtual bool key(const std::string& val) = 0; + virtual bool key(std::string&& val) = 0; /*! @brief the end of an object was read @@ -85,11 +88,11 @@ struct json_sax /*! @brief the beginning of an array was read - @param[in] elements number of array elements or -1 if unknown + @param[in] elements number of array elements or no_limit if unknown @return whether parsing should proceed @note binary formats may report the number of elements */ - virtual bool start_array(std::size_t elements) = 0; + virtual bool start_array(std::size_t elements = no_limit) = 0; /*! @brief the end of an array was read diff --git a/include/nlohmann/detail/input/lexer.hpp b/include/nlohmann/detail/input/lexer.hpp index 75001652..ea116093 100644 --- a/include/nlohmann/detail/input/lexer.hpp +++ b/include/nlohmann/detail/input/lexer.hpp @@ -1130,7 +1130,7 @@ scan_number_done: } /// return current string value (implicitly resets the token; useful only once) - std::string move_string() + std::string&& move_string() { return std::move(token_buffer); } diff --git a/include/nlohmann/detail/input/parser.hpp b/include/nlohmann/detail/input/parser.hpp index aa84a2be..8fc29981 100644 --- a/include/nlohmann/detail/input/parser.hpp +++ b/include/nlohmann/detail/input/parser.hpp @@ -53,7 +53,7 @@ class parser value }; - using json_sax = json_sax; + using json_sax_t = json_sax; using parser_callback_t = std::function; @@ -65,7 +65,7 @@ class parser : callback(cb), m_lexer(adapter), allow_exceptions(allow_exceptions_) {} - parser(detail::input_adapter_t adapter, json_sax* s) + parser(detail::input_adapter_t adapter, json_sax_t* s) : m_lexer(adapter), sax(s) {} @@ -541,7 +541,7 @@ class parser { case token_type::begin_object: { - if (not sax->start_object(std::size_t(-1))) + if (not sax->start_object()) { return false; } @@ -610,7 +610,7 @@ class parser case token_type::begin_array: { - if (not sax->start_array(std::size_t(-1))) + if (not sax->start_array()) { return false; } @@ -772,7 +772,7 @@ class parser /// whether to throw exceptions in case of errors const bool allow_exceptions = true; /// associated SAX parse event receiver - json_sax* sax = nullptr; + json_sax_t* sax = nullptr; }; } } diff --git a/include/nlohmann/json.hpp b/include/nlohmann/json.hpp index 8f5aee0c..f1b7e5dd 100644 --- a/include/nlohmann/json.hpp +++ b/include/nlohmann/json.hpp @@ -1105,7 +1105,7 @@ class basic_json */ using parser_callback_t = typename parser::parser_callback_t; - using json_sax = typename parser::json_sax; + using json_sax_t = typename parser::json_sax_t; ////////////////// // constructors // @@ -5926,12 +5926,12 @@ class basic_json return parser(i).accept(true); } - static bool sax_parse(detail::input_adapter i, json_sax* sax) + static bool sax_parse(detail::input_adapter i, json_sax_t* sax) { return parser(i, sax).sax_parse(); } - static bool sax_parse(detail::input_adapter& i, json_sax* sax) + static bool sax_parse(detail::input_adapter& i, json_sax_t* sax) { return parser(i, sax).sax_parse(); } @@ -6009,7 +6009,7 @@ class basic_json std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits::iterator_category>::value, int>::type = 0> - static bool sax_parse(IteratorType first, IteratorType last, json_sax* sax) + static bool sax_parse(IteratorType first, IteratorType last, json_sax_t* sax) { return parser(detail::input_adapter(first, last), sax).sax_parse(); } diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index 01e77f41..92123591 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -2969,7 +2969,7 @@ scan_number_done: } /// return current string value (implicitly resets the token; useful only once) - std::string move_string() + std::string&& move_string() { return std::move(token_buffer); } @@ -3154,6 +3154,9 @@ struct json_sax /// type for floating-point numbers using number_float_t = typename BasicJsonType::number_float_t; + /// constant to indicate that no size limit is given for array or object + static constexpr auto no_limit = std::size_t(-1); + /*! @brief a null value was read @return whether parsing should proceed @@ -3194,22 +3197,22 @@ struct json_sax @param[in] val string value @return whether parsing should proceed */ - virtual bool string(const std::string& val) = 0; + virtual bool string(std::string&& val) = 0; /*! @brief the beginning of an object was read - @param[in] elements number of object elements or -1 if unknown + @param[in] elements number of object elements or no_limit if unknown @return whether parsing should proceed @note binary formats may report the number of elements */ - virtual bool start_object(std::size_t elements) = 0; + virtual bool start_object(std::size_t elements = no_limit) = 0; /*! @brief an object key was read @param[in] val object key @return whether parsing should proceed */ - virtual bool key(const std::string& val) = 0; + virtual bool key(std::string&& val) = 0; /*! @brief the end of an object was read @@ -3219,11 +3222,11 @@ struct json_sax /*! @brief the beginning of an array was read - @param[in] elements number of array elements or -1 if unknown + @param[in] elements number of array elements or no_limit if unknown @return whether parsing should proceed @note binary formats may report the number of elements */ - virtual bool start_array(std::size_t elements) = 0; + virtual bool start_array(std::size_t elements = no_limit) = 0; /*! @brief the end of an array was read @@ -3297,7 +3300,7 @@ class parser value }; - using json_sax = json_sax; + using json_sax_t = json_sax; using parser_callback_t = std::function; @@ -3309,7 +3312,7 @@ class parser : callback(cb), m_lexer(adapter), allow_exceptions(allow_exceptions_) {} - parser(detail::input_adapter_t adapter, json_sax* s) + parser(detail::input_adapter_t adapter, json_sax_t* s) : m_lexer(adapter), sax(s) {} @@ -3785,7 +3788,7 @@ class parser { case token_type::begin_object: { - if (not sax->start_object(std::size_t(-1))) + if (not sax->start_object()) { return false; } @@ -3854,7 +3857,7 @@ class parser case token_type::begin_array: { - if (not sax->start_array(std::size_t(-1))) + if (not sax->start_array()) { return false; } @@ -4016,7 +4019,7 @@ class parser /// whether to throw exceptions in case of errors const bool allow_exceptions = true; /// associated SAX parse event receiver - json_sax* sax = nullptr; + json_sax_t* sax = nullptr; }; } } @@ -11013,7 +11016,7 @@ class basic_json */ using parser_callback_t = typename parser::parser_callback_t; - using json_sax = typename parser::json_sax; + using json_sax_t = typename parser::json_sax_t; ////////////////// // constructors // @@ -15834,12 +15837,12 @@ class basic_json return parser(i).accept(true); } - static bool sax_parse(detail::input_adapter i, json_sax* sax) + static bool sax_parse(detail::input_adapter i, json_sax_t* sax) { return parser(i, sax).sax_parse(); } - static bool sax_parse(detail::input_adapter& i, json_sax* sax) + static bool sax_parse(detail::input_adapter& i, json_sax_t* sax) { return parser(i, sax).sax_parse(); } @@ -15917,7 +15920,7 @@ class basic_json std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits::iterator_category>::value, int>::type = 0> - static bool sax_parse(IteratorType first, IteratorType last, json_sax* sax) + static bool sax_parse(IteratorType first, IteratorType last, json_sax_t* sax) { return parser(detail::input_adapter(first, last), sax).sax_parse(); } diff --git a/test/src/unit-class_parser.cpp b/test/src/unit-class_parser.cpp index cfa90f26..bbf10589 100644 --- a/test/src/unit-class_parser.cpp +++ b/test/src/unit-class_parser.cpp @@ -34,7 +34,7 @@ using nlohmann::json; #include -class SaxEventLogger : public nlohmann::json::json_sax +class SaxEventLogger : public nlohmann::json::json_sax_t { public: bool null() override @@ -67,7 +67,7 @@ class SaxEventLogger : public nlohmann::json::json_sax return true; } - bool string(const std::string& val) override + bool string(std::string&& val) override { events.push_back("string(" + val + ")"); return true; @@ -75,7 +75,7 @@ class SaxEventLogger : public nlohmann::json::json_sax bool start_object(std::size_t elements) override { - if (elements == std::size_t(-1)) + if (elements == no_limit) { events.push_back("start_object()"); } @@ -86,13 +86,13 @@ class SaxEventLogger : public nlohmann::json::json_sax return true; } - bool key(const std::string& val) override + bool key(std::string&& val) override { events.push_back("key(" + val + ")"); return true; } - bool end_object()override + bool end_object() override { events.push_back("end_object()"); return true; @@ -100,7 +100,7 @@ class SaxEventLogger : public nlohmann::json::json_sax bool start_array(std::size_t elements) override { - if (elements == std::size_t(-1)) + if (elements == no_limit) { events.push_back("start_array()"); } @@ -134,6 +134,129 @@ class SaxEventLogger : public nlohmann::json::json_sax bool errored = false; }; +class SaxDomParser : public nlohmann::json::json_sax_t +{ + public: + bool null() override + { + handle_value(nullptr); + return true; + } + + bool boolean(bool val) override + { + handle_value(val); + return true; + } + + bool number_integer(json::number_integer_t val) override + { + handle_value(val); + return true; + } + + bool number_unsigned(json::number_unsigned_t val) override + { + handle_value(val); + return true; + } + + bool number_float(json::number_float_t val, const std::string&) override + { + handle_value(val); + return true; + } + + bool string(std::string&& val) override + { + handle_value(val); + return true; + } + + bool start_object(std::size_t) override + { + ref_stack.push_back(handle_value(json::value_t::object)); + return true; + } + + bool key(std::string&& val) override + { + last_key = val; + return true; + } + + bool end_object() override + { + ref_stack.pop_back(); + return true; + } + + bool start_array(std::size_t) override + { + ref_stack.push_back(handle_value(json::value_t::array)); + return true; + } + + bool end_array() override + { + ref_stack.pop_back(); + return true; + } + + bool binary(const std::vector&) override + { + return true; + } + + bool parse_error(std::size_t position, const std::string&) override + { + return false; + } + + json& get_value() + { + return root; + } + + private: + /// the parsed JSON value + json root; + /// stack to model hierarchy of values + std::vector ref_stack; + /// helper variable for object keys + std::string last_key; + + /*! + @invariant If the ref stack is empty, then the passed value will be the new + root. + @invariant If the ref stack contains a value, then it is an array or an + object to which we can add elements + */ + json* handle_value(json&& j) + { + if (ref_stack.empty()) + { + assert(root.is_null()); + root = j; + return &root; + } + else + { + assert(ref_stack.back()->is_array() or ref_stack.back()->is_object()); + if (ref_stack.back()->is_array()) + { + ref_stack.back()->push_back(j); + return &(ref_stack.back()->back()); + } + else + { + json& r = ref_stack.back()->operator[](last_key) = j; + return &r; + } + } + } +}; + json parser_helper(const std::string& s); bool accept_helper(const std::string& s); @@ -148,6 +271,10 @@ json parser_helper(const std::string& s) CHECK_NOTHROW(json::parser(nlohmann::detail::input_adapter(s), nullptr, false).parse(true, j_nothrow)); CHECK(j_nothrow == j); + SaxDomParser sdp; + json::sax_parse(s, &sdp); + CHECK(sdp.get_value() == j); + return j; } diff --git a/test/src/unit-deserialization.cpp b/test/src/unit-deserialization.cpp index e48d6348..d50e6924 100644 --- a/test/src/unit-deserialization.cpp +++ b/test/src/unit-deserialization.cpp @@ -34,7 +34,7 @@ using nlohmann::json; #include #include -struct SaxEventLogger : public nlohmann::json::json_sax +struct SaxEventLogger : public nlohmann::json::json_sax_t { bool null() override { @@ -66,7 +66,7 @@ struct SaxEventLogger : public nlohmann::json::json_sax return true; } - bool string(const std::string& val) override + bool string(std::string&& val) override { events.push_back("string(" + val + ")"); return true; @@ -85,7 +85,7 @@ struct SaxEventLogger : public nlohmann::json::json_sax return true; } - bool key(const std::string& val) override + bool key(std::string&& val) override { events.push_back("key(" + val + ")"); return true; @@ -135,7 +135,7 @@ struct SaxEventLoggerExitAfterStartObject : public SaxEventLogger { bool start_object(std::size_t elements) override { - if (elements == std::size_t(-1)) + if (elements == no_limit) { events.push_back("start_object()"); } @@ -149,7 +149,7 @@ struct SaxEventLoggerExitAfterStartObject : public SaxEventLogger struct SaxEventLoggerExitAfterKey : public SaxEventLogger { - bool key(const std::string& val) override + bool key(std::string&& val) override { events.push_back("key(" + val + ")"); return false; @@ -160,7 +160,7 @@ struct SaxEventLoggerExitAfterStartArray : public SaxEventLogger { bool start_array(std::size_t elements) override { - if (elements == std::size_t(-1)) + if (elements == no_limit) { events.push_back("start_array()"); } From 9d27429527bd00c1fe90b1e280e0ba512e96425d Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Mon, 5 Mar 2018 16:46:35 +0100 Subject: [PATCH 08/43] :hammer: added error messages to SAX interface --- include/nlohmann/detail/input/json_sax.hpp | 5 ++- include/nlohmann/detail/input/parser.hpp | 35 ++++++++++++------- single_include/nlohmann/json.hpp | 40 +++++++++++++++------- test/src/unit-class_parser.cpp | 4 +-- test/src/unit-deserialization.cpp | 2 +- 5 files changed, 57 insertions(+), 29 deletions(-) diff --git a/include/nlohmann/detail/input/json_sax.hpp b/include/nlohmann/detail/input/json_sax.hpp index 87f2119f..af2e7f24 100644 --- a/include/nlohmann/detail/input/json_sax.hpp +++ b/include/nlohmann/detail/input/json_sax.hpp @@ -113,9 +113,12 @@ struct json_sax @brief a parse error occurred @param[in] position the position in the input where the error occurs @param[in] last_token the last read token + @param[in] error_msg a detailed error message @return whether parsing should proceed */ - virtual bool parse_error(std::size_t position, const std::string& last_token) = 0; + virtual bool parse_error(std::size_t position, + const std::string& last_token, + const std::string& error_msg) = 0; virtual ~json_sax() = default; }; diff --git a/include/nlohmann/detail/input/parser.hpp b/include/nlohmann/detail/input/parser.hpp index 8fc29981..ece475bb 100644 --- a/include/nlohmann/detail/input/parser.hpp +++ b/include/nlohmann/detail/input/parser.hpp @@ -562,7 +562,8 @@ class parser if (JSON_UNLIKELY(last_token != token_type::value_string)) { return sax->parse_error(m_lexer.get_position(), - m_lexer.get_token_string()); + m_lexer.get_token_string(), + exception_message(token_type::value_string)); } else { @@ -577,7 +578,8 @@ class parser if (JSON_UNLIKELY(last_token != token_type::name_separator)) { return sax->parse_error(m_lexer.get_position(), - m_lexer.get_token_string()); + m_lexer.get_token_string(), + exception_message(token_type::name_separator)); } // parse value @@ -603,7 +605,8 @@ class parser else { return sax->parse_error(m_lexer.get_position(), - m_lexer.get_token_string()); + m_lexer.get_token_string(), + exception_message(token_type::end_object)); } } } @@ -649,7 +652,8 @@ class parser else { return sax->parse_error(m_lexer.get_position(), - m_lexer.get_token_string()); + m_lexer.get_token_string(), + exception_message(token_type::end_array)); } } } @@ -661,7 +665,8 @@ class parser if (JSON_UNLIKELY(not std::isfinite(res))) { return sax->parse_error(m_lexer.get_position(), - m_lexer.get_token_string()); + m_lexer.get_token_string(), + "number overflow"); } else { @@ -699,10 +704,19 @@ class parser return sax->number_unsigned(m_lexer.get_number_unsigned()); } + case token_type::parse_error: + { + // using "uninitialized" to avoid "expected" message + return sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + exception_message(token_type::uninitialized)); + } + default: // the last token was unexpected { return sax->parse_error(m_lexer.get_position(), - m_lexer.get_token_string()); + m_lexer.get_token_string(), + exception_message(token_type::literal_or_value)); } } } @@ -721,10 +735,9 @@ class parser if (JSON_UNLIKELY(t != last_token)) { errored = true; - expected = t; if (allow_exceptions) { - throw_exception(); + JSON_THROW(parse_error::create(101, m_lexer.get_position(), exception_message(t))); } else { @@ -735,7 +748,7 @@ class parser return true; } - [[noreturn]] void throw_exception() const + std::string exception_message(const token_type expected) { std::string error_msg = "syntax error - "; if (last_token == token_type::parse_error) @@ -753,7 +766,7 @@ class parser error_msg += "; expected " + std::string(lexer_t::token_type_name(expected)); } - JSON_THROW(parse_error::create(101, m_lexer.get_position(), error_msg)); + return error_msg; } private: @@ -767,8 +780,6 @@ class parser lexer_t m_lexer; /// whether a syntax error occurred bool errored = false; - /// possible reason for the syntax error - token_type expected = token_type::uninitialized; /// whether to throw exceptions in case of errors const bool allow_exceptions = true; /// associated SAX parse event receiver diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index 656afa10..6138d33a 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -3247,9 +3247,12 @@ struct json_sax @brief a parse error occurred @param[in] position the position in the input where the error occurs @param[in] last_token the last read token + @param[in] error_msg a detailed error message @return whether parsing should proceed */ - virtual bool parse_error(std::size_t position, const std::string& last_token) = 0; + virtual bool parse_error(std::size_t position, + const std::string& last_token, + const std::string& error_msg) = 0; virtual ~json_sax() = default; }; @@ -3809,7 +3812,8 @@ class parser if (JSON_UNLIKELY(last_token != token_type::value_string)) { return sax->parse_error(m_lexer.get_position(), - m_lexer.get_token_string()); + m_lexer.get_token_string(), + exception_message(token_type::value_string)); } else { @@ -3824,7 +3828,8 @@ class parser if (JSON_UNLIKELY(last_token != token_type::name_separator)) { return sax->parse_error(m_lexer.get_position(), - m_lexer.get_token_string()); + m_lexer.get_token_string(), + exception_message(token_type::name_separator)); } // parse value @@ -3850,7 +3855,8 @@ class parser else { return sax->parse_error(m_lexer.get_position(), - m_lexer.get_token_string()); + m_lexer.get_token_string(), + exception_message(token_type::end_object)); } } } @@ -3896,7 +3902,8 @@ class parser else { return sax->parse_error(m_lexer.get_position(), - m_lexer.get_token_string()); + m_lexer.get_token_string(), + exception_message(token_type::end_array)); } } } @@ -3908,7 +3915,8 @@ class parser if (JSON_UNLIKELY(not std::isfinite(res))) { return sax->parse_error(m_lexer.get_position(), - m_lexer.get_token_string()); + m_lexer.get_token_string(), + "number overflow"); } else { @@ -3946,10 +3954,19 @@ class parser return sax->number_unsigned(m_lexer.get_number_unsigned()); } + case token_type::parse_error: + { + // using "uninitialized" to avoid "expected" message + return sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + exception_message(token_type::uninitialized)); + } + default: // the last token was unexpected { return sax->parse_error(m_lexer.get_position(), - m_lexer.get_token_string()); + m_lexer.get_token_string(), + exception_message(token_type::literal_or_value)); } } } @@ -3968,10 +3985,9 @@ class parser if (JSON_UNLIKELY(t != last_token)) { errored = true; - expected = t; if (allow_exceptions) { - throw_exception(); + JSON_THROW(parse_error::create(101, m_lexer.get_position(), exception_message(t))); } else { @@ -3982,7 +3998,7 @@ class parser return true; } - [[noreturn]] void throw_exception() const + std::string exception_message(const token_type expected) { std::string error_msg = "syntax error - "; if (last_token == token_type::parse_error) @@ -4000,7 +4016,7 @@ class parser error_msg += "; expected " + std::string(lexer_t::token_type_name(expected)); } - JSON_THROW(parse_error::create(101, m_lexer.get_position(), error_msg)); + return error_msg; } private: @@ -4014,8 +4030,6 @@ class parser lexer_t m_lexer; /// whether a syntax error occurred bool errored = false; - /// possible reason for the syntax error - token_type expected = token_type::uninitialized; /// whether to throw exceptions in case of errors const bool allow_exceptions = true; /// associated SAX parse event receiver diff --git a/test/src/unit-class_parser.cpp b/test/src/unit-class_parser.cpp index bbf10589..9d40bfeb 100644 --- a/test/src/unit-class_parser.cpp +++ b/test/src/unit-class_parser.cpp @@ -123,7 +123,7 @@ class SaxEventLogger : public nlohmann::json::json_sax_t return true; } - bool parse_error(std::size_t position, const std::string&) override + bool parse_error(std::size_t position, const std::string&, const std::string&) override { errored = true; events.push_back("parse_error(" + std::to_string(position) + ")"); @@ -208,7 +208,7 @@ class SaxDomParser : public nlohmann::json::json_sax_t return true; } - bool parse_error(std::size_t position, const std::string&) override + bool parse_error(std::size_t position, const std::string&, const std::string&) override { return false; } diff --git a/test/src/unit-deserialization.cpp b/test/src/unit-deserialization.cpp index d50e6924..cd87dfdb 100644 --- a/test/src/unit-deserialization.cpp +++ b/test/src/unit-deserialization.cpp @@ -122,7 +122,7 @@ struct SaxEventLogger : public nlohmann::json::json_sax_t return true; } - bool parse_error(std::size_t position, const std::string&) override + bool parse_error(std::size_t position, const std::string&, const std::string&) override { events.push_back("parse_error(" + std::to_string(position) + ")"); return false; From 5b9d03cfdbe44e3352c9b293a86ae71e339977de Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Mon, 5 Mar 2018 21:06:00 +0100 Subject: [PATCH 09/43] :hammer: added SAX-DOM-Parser --- benchmarks/src/benchmarks.cpp | 56 +++++++++ include/nlohmann/detail/input/json_sax.hpp | 132 ++++++++++++++++++++ include/nlohmann/json.hpp | 2 + single_include/nlohmann/json.hpp | 134 +++++++++++++++++++++ test/src/unit-class_parser.cpp | 125 +------------------ 5 files changed, 325 insertions(+), 124 deletions(-) diff --git a/benchmarks/src/benchmarks.cpp b/benchmarks/src/benchmarks.cpp index bebef603..46135e44 100644 --- a/benchmarks/src/benchmarks.cpp +++ b/benchmarks/src/benchmarks.cpp @@ -37,6 +37,35 @@ BENCHMARK_CAPTURE(ParseFile, signed_ints, "data/numbers/signed_ints.json"); BENCHMARK_CAPTURE(ParseFile, unsigned_ints, "data/numbers/unsigned_ints.json"); +static void ParseFileSax(benchmark::State& state, const char* filename) +{ + while (state.KeepRunning()) + { + state.PauseTiming(); + auto* f = new std::ifstream(filename); + auto* sdp = new nlohmann::json_sax_dom_parser(); + state.ResumeTiming(); + + json::sax_parse(*f, sdp); + + state.PauseTiming(); + delete f; + delete sdp; + state.ResumeTiming(); + } + + std::ifstream file(filename, std::ios::binary | std::ios::ate); + state.SetBytesProcessed(state.iterations() * file.tellg()); +} +BENCHMARK_CAPTURE(ParseFileSax, jeopardy, "data/jeopardy/jeopardy.json"); +BENCHMARK_CAPTURE(ParseFileSax, canada, "data/nativejson-benchmark/canada.json"); +BENCHMARK_CAPTURE(ParseFileSax, citm_catalog, "data/nativejson-benchmark/citm_catalog.json"); +BENCHMARK_CAPTURE(ParseFileSax, twitter, "data/nativejson-benchmark/twitter.json"); +BENCHMARK_CAPTURE(ParseFileSax, floats, "data/numbers/floats.json"); +BENCHMARK_CAPTURE(ParseFileSax, signed_ints, "data/numbers/signed_ints.json"); +BENCHMARK_CAPTURE(ParseFileSax, unsigned_ints, "data/numbers/unsigned_ints.json"); + + ////////////////////////////////////////////////////////////////////////////// // parse JSON from string ////////////////////////////////////////////////////////////////////////////// @@ -69,6 +98,33 @@ BENCHMARK_CAPTURE(ParseString, floats, "data/numbers/floats.json"); BENCHMARK_CAPTURE(ParseString, signed_ints, "data/numbers/signed_ints.json"); BENCHMARK_CAPTURE(ParseString, unsigned_ints, "data/numbers/unsigned_ints.json"); +static void ParseStringSax(benchmark::State& state, const char* filename) +{ + std::ifstream f(filename); + std::string str((std::istreambuf_iterator(f)), std::istreambuf_iterator()); + + while (state.KeepRunning()) + { + state.PauseTiming(); + auto* sdp = new nlohmann::json_sax_dom_parser(); + state.ResumeTiming(); + + json::sax_parse(str, sdp); + + state.PauseTiming(); + delete sdp; + state.ResumeTiming(); + } + + state.SetBytesProcessed(state.iterations() * str.size()); +} +BENCHMARK_CAPTURE(ParseStringSax, jeopardy, "data/jeopardy/jeopardy.json"); +BENCHMARK_CAPTURE(ParseStringSax, canada, "data/nativejson-benchmark/canada.json"); +BENCHMARK_CAPTURE(ParseStringSax, citm_catalog, "data/nativejson-benchmark/citm_catalog.json"); +BENCHMARK_CAPTURE(ParseStringSax, twitter, "data/nativejson-benchmark/twitter.json"); +BENCHMARK_CAPTURE(ParseStringSax, floats, "data/numbers/floats.json"); +BENCHMARK_CAPTURE(ParseStringSax, signed_ints, "data/numbers/signed_ints.json"); +BENCHMARK_CAPTURE(ParseStringSax, unsigned_ints, "data/numbers/unsigned_ints.json"); ////////////////////////////////////////////////////////////////////////////// // serialize JSON diff --git a/include/nlohmann/detail/input/json_sax.hpp b/include/nlohmann/detail/input/json_sax.hpp index af2e7f24..38a0a710 100644 --- a/include/nlohmann/detail/input/json_sax.hpp +++ b/include/nlohmann/detail/input/json_sax.hpp @@ -122,5 +122,137 @@ struct json_sax virtual ~json_sax() = default; }; + + +template +class json_sax_dom_parser : public json_sax +{ + public: + using number_integer_t = typename BasicJsonType::number_integer_t; + using number_unsigned_t = typename BasicJsonType::number_unsigned_t; + using number_float_t = typename BasicJsonType::number_float_t; + + bool null() override + { + handle_value(nullptr); + return true; + } + + bool boolean(bool val) override + { + handle_value(val); + return true; + } + + bool number_integer(number_integer_t val) override + { + handle_value(val); + return true; + } + + bool number_unsigned(number_unsigned_t val) override + { + handle_value(val); + return true; + } + + bool number_float(number_float_t val, const std::string&) override + { + handle_value(val); + return true; + } + + bool string(std::string&& val) override + { + handle_value(val); + return true; + } + + bool start_object(std::size_t) override + { + ref_stack.push_back(handle_value(BasicJsonType::value_t::object)); + return true; + } + + bool key(std::string&& val) override + { + last_key = val; + return true; + } + + bool end_object() override + { + ref_stack.pop_back(); + return true; + } + + bool start_array(std::size_t) override + { + ref_stack.push_back(handle_value(BasicJsonType::value_t::array)); + return true; + } + + bool end_array() override + { + ref_stack.pop_back(); + return true; + } + + bool binary(const std::vector&) override + { + return true; + } + + bool parse_error(std::size_t position, const std::string&, const std::string& error_msg) override + { + JSON_THROW(BasicJsonType::parse_error::create(101, position, error_msg)); + return false; + } + + BasicJsonType& get_value() + { + return root; + } + + private: + /// the parsed JSON value + BasicJsonType root; + /// stack to model hierarchy of values + std::vector ref_stack; + /// helper variable for object keys + std::string last_key; + + /*! + @invariant If the ref stack is empty, then the passed value will be the new + root. + @invariant If the ref stack contains a value, then it is an array or an + object to which we can add elements + */ + template + BasicJsonType* handle_value(Value&& v) + { + if (ref_stack.empty()) + { + assert(root.is_null()); + root = BasicJsonType(std::forward(v)); + return &root; + } + else + { + assert(ref_stack.back()->is_array() or ref_stack.back()->is_object()); + if (ref_stack.back()->is_array()) + { + ref_stack.back()->m_value.array->push_back(BasicJsonType(std::forward(v))); + return &(ref_stack.back()->m_value.array->back()); + } + else + { + BasicJsonType& r = ref_stack.back()->m_value.object->operator[](last_key) = BasicJsonType(std::forward(v)); + return &r; + } + } + } +}; + } diff --git a/include/nlohmann/json.hpp b/include/nlohmann/json.hpp index 49cce53d..7eb0a352 100644 --- a/include/nlohmann/json.hpp +++ b/include/nlohmann/json.hpp @@ -171,6 +171,8 @@ class basic_json friend class ::nlohmann::detail::binary_writer; template friend class ::nlohmann::detail::binary_reader; + template + friend class ::nlohmann::json_sax_dom_parser; /// workaround type for MSVC using basic_json_t = NLOHMANN_BASIC_JSON_TPL; diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index 6138d33a..009f1109 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -3256,6 +3256,138 @@ struct json_sax virtual ~json_sax() = default; }; + + +template +class json_sax_dom_parser : public json_sax +{ + public: + using number_integer_t = typename BasicJsonType::number_integer_t; + using number_unsigned_t = typename BasicJsonType::number_unsigned_t; + using number_float_t = typename BasicJsonType::number_float_t; + + bool null() override + { + handle_value(nullptr); + return true; + } + + bool boolean(bool val) override + { + handle_value(val); + return true; + } + + bool number_integer(number_integer_t val) override + { + handle_value(val); + return true; + } + + bool number_unsigned(number_unsigned_t val) override + { + handle_value(val); + return true; + } + + bool number_float(number_float_t val, const std::string&) override + { + handle_value(val); + return true; + } + + bool string(std::string&& val) override + { + handle_value(val); + return true; + } + + bool start_object(std::size_t) override + { + ref_stack.push_back(handle_value(BasicJsonType::value_t::object)); + return true; + } + + bool key(std::string&& val) override + { + last_key = val; + return true; + } + + bool end_object() override + { + ref_stack.pop_back(); + return true; + } + + bool start_array(std::size_t) override + { + ref_stack.push_back(handle_value(BasicJsonType::value_t::array)); + return true; + } + + bool end_array() override + { + ref_stack.pop_back(); + return true; + } + + bool binary(const std::vector&) override + { + return true; + } + + bool parse_error(std::size_t position, const std::string&, const std::string& error_msg) override + { + JSON_THROW(BasicJsonType::parse_error::create(101, position, error_msg)); + return false; + } + + BasicJsonType& get_value() + { + return root; + } + + private: + /// the parsed JSON value + BasicJsonType root; + /// stack to model hierarchy of values + std::vector ref_stack; + /// helper variable for object keys + std::string last_key; + + /*! + @invariant If the ref stack is empty, then the passed value will be the new + root. + @invariant If the ref stack contains a value, then it is an array or an + object to which we can add elements + */ + template + BasicJsonType* handle_value(Value&& v) + { + if (ref_stack.empty()) + { + assert(root.is_null()); + root = BasicJsonType(std::forward(v)); + return &root; + } + else + { + assert(ref_stack.back()->is_array() or ref_stack.back()->is_object()); + if (ref_stack.back()->is_array()) + { + ref_stack.back()->m_value.array->push_back(BasicJsonType(std::forward(v))); + return &(ref_stack.back()->m_value.array->back()); + } + else + { + BasicJsonType& r = ref_stack.back()->m_value.object->operator[](last_key) = BasicJsonType(std::forward(v)); + return &r; + } + } + } +}; + } @@ -10096,6 +10228,8 @@ class basic_json friend class ::nlohmann::detail::binary_writer; template friend class ::nlohmann::detail::binary_reader; + template + friend class ::nlohmann::json_sax_dom_parser; /// workaround type for MSVC using basic_json_t = NLOHMANN_BASIC_JSON_TPL; diff --git a/test/src/unit-class_parser.cpp b/test/src/unit-class_parser.cpp index 9d40bfeb..a15ad167 100644 --- a/test/src/unit-class_parser.cpp +++ b/test/src/unit-class_parser.cpp @@ -134,129 +134,6 @@ class SaxEventLogger : public nlohmann::json::json_sax_t bool errored = false; }; -class SaxDomParser : public nlohmann::json::json_sax_t -{ - public: - bool null() override - { - handle_value(nullptr); - return true; - } - - bool boolean(bool val) override - { - handle_value(val); - return true; - } - - bool number_integer(json::number_integer_t val) override - { - handle_value(val); - return true; - } - - bool number_unsigned(json::number_unsigned_t val) override - { - handle_value(val); - return true; - } - - bool number_float(json::number_float_t val, const std::string&) override - { - handle_value(val); - return true; - } - - bool string(std::string&& val) override - { - handle_value(val); - return true; - } - - bool start_object(std::size_t) override - { - ref_stack.push_back(handle_value(json::value_t::object)); - return true; - } - - bool key(std::string&& val) override - { - last_key = val; - return true; - } - - bool end_object() override - { - ref_stack.pop_back(); - return true; - } - - bool start_array(std::size_t) override - { - ref_stack.push_back(handle_value(json::value_t::array)); - return true; - } - - bool end_array() override - { - ref_stack.pop_back(); - return true; - } - - bool binary(const std::vector&) override - { - return true; - } - - bool parse_error(std::size_t position, const std::string&, const std::string&) override - { - return false; - } - - json& get_value() - { - return root; - } - - private: - /// the parsed JSON value - json root; - /// stack to model hierarchy of values - std::vector ref_stack; - /// helper variable for object keys - std::string last_key; - - /*! - @invariant If the ref stack is empty, then the passed value will be the new - root. - @invariant If the ref stack contains a value, then it is an array or an - object to which we can add elements - */ - json* handle_value(json&& j) - { - if (ref_stack.empty()) - { - assert(root.is_null()); - root = j; - return &root; - } - else - { - assert(ref_stack.back()->is_array() or ref_stack.back()->is_object()); - if (ref_stack.back()->is_array()) - { - ref_stack.back()->push_back(j); - return &(ref_stack.back()->back()); - } - else - { - json& r = ref_stack.back()->operator[](last_key) = j; - return &r; - } - } - } -}; - json parser_helper(const std::string& s); bool accept_helper(const std::string& s); @@ -271,7 +148,7 @@ json parser_helper(const std::string& s) CHECK_NOTHROW(json::parser(nlohmann::detail::input_adapter(s), nullptr, false).parse(true, j_nothrow)); CHECK(j_nothrow == j); - SaxDomParser sdp; + nlohmann::json_sax_dom_parser sdp; json::sax_parse(s, &sdp); CHECK(sdp.get_value() == j); From faf2546a152ca5848eae309d5d713392a58caf70 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Tue, 6 Mar 2018 07:19:05 +0100 Subject: [PATCH 10/43] :hammer: simplified SAX-DOM parser --- include/nlohmann/detail/input/json_sax.hpp | 12 +++++++----- single_include/nlohmann/json.hpp | 12 +++++++----- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/include/nlohmann/detail/input/json_sax.hpp b/include/nlohmann/detail/input/json_sax.hpp index 38a0a710..d0161cd4 100644 --- a/include/nlohmann/detail/input/json_sax.hpp +++ b/include/nlohmann/detail/input/json_sax.hpp @@ -176,7 +176,8 @@ class json_sax_dom_parser : public json_sax bool key(std::string&& val) override { - last_key = val; + // add null at given key and store the reference for later + object_element = &(ref_stack.back()->m_value.object->operator[](val)); return true; } @@ -219,8 +220,8 @@ class json_sax_dom_parser : public json_sax BasicJsonType root; /// stack to model hierarchy of values std::vector ref_stack; - /// helper variable for object keys - std::string last_key; + /// helper to hold the reference for the next object element + BasicJsonType* object_element = nullptr; /*! @invariant If the ref stack is empty, then the passed value will be the new @@ -247,8 +248,9 @@ class json_sax_dom_parser : public json_sax } else { - BasicJsonType& r = ref_stack.back()->m_value.object->operator[](last_key) = BasicJsonType(std::forward(v)); - return &r; + assert(object_element); + *object_element = BasicJsonType(std::forward(v)); + return object_element; } } } diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index 009f1109..3b1f7e43 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -3310,7 +3310,8 @@ class json_sax_dom_parser : public json_sax bool key(std::string&& val) override { - last_key = val; + // add null at given key and store the reference for later + object_element = &(ref_stack.back()->m_value.object->operator[](val)); return true; } @@ -3353,8 +3354,8 @@ class json_sax_dom_parser : public json_sax BasicJsonType root; /// stack to model hierarchy of values std::vector ref_stack; - /// helper variable for object keys - std::string last_key; + /// helper to hold the reference for the next object element + BasicJsonType* object_element = nullptr; /*! @invariant If the ref stack is empty, then the passed value will be the new @@ -3381,8 +3382,9 @@ class json_sax_dom_parser : public json_sax } else { - BasicJsonType& r = ref_stack.back()->m_value.object->operator[](last_key) = BasicJsonType(std::forward(v)); - return &r; + assert(object_element); + *object_element = BasicJsonType(std::forward(v)); + return object_element; } } } From 5beab80553cffd61f546f229de24718f8147d006 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Tue, 6 Mar 2018 18:17:07 +0100 Subject: [PATCH 11/43] :hammer: using the SAX-DOM parser --- benchmarks/src/benchmarks.cpp | 56 --------- include/nlohmann/detail/input/json_sax.hpp | 43 +++++-- include/nlohmann/detail/input/parser.hpp | 90 +++++++------ include/nlohmann/json.hpp | 6 +- single_include/nlohmann/json.hpp | 139 +++++++++++++-------- test/src/unit-class_parser.cpp | 5 +- 6 files changed, 173 insertions(+), 166 deletions(-) diff --git a/benchmarks/src/benchmarks.cpp b/benchmarks/src/benchmarks.cpp index 46135e44..bebef603 100644 --- a/benchmarks/src/benchmarks.cpp +++ b/benchmarks/src/benchmarks.cpp @@ -37,35 +37,6 @@ BENCHMARK_CAPTURE(ParseFile, signed_ints, "data/numbers/signed_ints.json"); BENCHMARK_CAPTURE(ParseFile, unsigned_ints, "data/numbers/unsigned_ints.json"); -static void ParseFileSax(benchmark::State& state, const char* filename) -{ - while (state.KeepRunning()) - { - state.PauseTiming(); - auto* f = new std::ifstream(filename); - auto* sdp = new nlohmann::json_sax_dom_parser(); - state.ResumeTiming(); - - json::sax_parse(*f, sdp); - - state.PauseTiming(); - delete f; - delete sdp; - state.ResumeTiming(); - } - - std::ifstream file(filename, std::ios::binary | std::ios::ate); - state.SetBytesProcessed(state.iterations() * file.tellg()); -} -BENCHMARK_CAPTURE(ParseFileSax, jeopardy, "data/jeopardy/jeopardy.json"); -BENCHMARK_CAPTURE(ParseFileSax, canada, "data/nativejson-benchmark/canada.json"); -BENCHMARK_CAPTURE(ParseFileSax, citm_catalog, "data/nativejson-benchmark/citm_catalog.json"); -BENCHMARK_CAPTURE(ParseFileSax, twitter, "data/nativejson-benchmark/twitter.json"); -BENCHMARK_CAPTURE(ParseFileSax, floats, "data/numbers/floats.json"); -BENCHMARK_CAPTURE(ParseFileSax, signed_ints, "data/numbers/signed_ints.json"); -BENCHMARK_CAPTURE(ParseFileSax, unsigned_ints, "data/numbers/unsigned_ints.json"); - - ////////////////////////////////////////////////////////////////////////////// // parse JSON from string ////////////////////////////////////////////////////////////////////////////// @@ -98,33 +69,6 @@ BENCHMARK_CAPTURE(ParseString, floats, "data/numbers/floats.json"); BENCHMARK_CAPTURE(ParseString, signed_ints, "data/numbers/signed_ints.json"); BENCHMARK_CAPTURE(ParseString, unsigned_ints, "data/numbers/unsigned_ints.json"); -static void ParseStringSax(benchmark::State& state, const char* filename) -{ - std::ifstream f(filename); - std::string str((std::istreambuf_iterator(f)), std::istreambuf_iterator()); - - while (state.KeepRunning()) - { - state.PauseTiming(); - auto* sdp = new nlohmann::json_sax_dom_parser(); - state.ResumeTiming(); - - json::sax_parse(str, sdp); - - state.PauseTiming(); - delete sdp; - state.ResumeTiming(); - } - - state.SetBytesProcessed(state.iterations() * str.size()); -} -BENCHMARK_CAPTURE(ParseStringSax, jeopardy, "data/jeopardy/jeopardy.json"); -BENCHMARK_CAPTURE(ParseStringSax, canada, "data/nativejson-benchmark/canada.json"); -BENCHMARK_CAPTURE(ParseStringSax, citm_catalog, "data/nativejson-benchmark/citm_catalog.json"); -BENCHMARK_CAPTURE(ParseStringSax, twitter, "data/nativejson-benchmark/twitter.json"); -BENCHMARK_CAPTURE(ParseStringSax, floats, "data/numbers/floats.json"); -BENCHMARK_CAPTURE(ParseStringSax, signed_ints, "data/numbers/signed_ints.json"); -BENCHMARK_CAPTURE(ParseStringSax, unsigned_ints, "data/numbers/unsigned_ints.json"); ////////////////////////////////////////////////////////////////////////////// // serialize JSON diff --git a/include/nlohmann/detail/input/json_sax.hpp b/include/nlohmann/detail/input/json_sax.hpp index d0161cd4..06227e70 100644 --- a/include/nlohmann/detail/input/json_sax.hpp +++ b/include/nlohmann/detail/input/json_sax.hpp @@ -132,6 +132,10 @@ class json_sax_dom_parser : public json_sax using number_unsigned_t = typename BasicJsonType::number_unsigned_t; using number_float_t = typename BasicJsonType::number_float_t; + json_sax_dom_parser(BasicJsonType& r, const bool allow_exceptions_ = true) + : root(r), allow_exceptions(allow_exceptions_) + {} + bool null() override { handle_value(nullptr); @@ -204,25 +208,30 @@ class json_sax_dom_parser : public json_sax return true; } - bool parse_error(std::size_t position, const std::string&, const std::string& error_msg) override + bool parse_error(std::size_t position, const std::string& token, + const std::string& error_msg) override { - JSON_THROW(BasicJsonType::parse_error::create(101, position, error_msg)); + errored = true; + if (allow_exceptions) + { + if (error_msg == "number overflow") + { + JSON_THROW(BasicJsonType::out_of_range::create(406, "number overflow parsing '" + token + "'")); + } + else + { + JSON_THROW(BasicJsonType::parse_error::create(101, position, error_msg)); + } + } return false; } - BasicJsonType& get_value() + bool is_errored() const { - return root; + return errored; } private: - /// the parsed JSON value - BasicJsonType root; - /// stack to model hierarchy of values - std::vector ref_stack; - /// helper to hold the reference for the next object element - BasicJsonType* object_element = nullptr; - /*! @invariant If the ref stack is empty, then the passed value will be the new root. @@ -234,7 +243,6 @@ class json_sax_dom_parser : public json_sax { if (ref_stack.empty()) { - assert(root.is_null()); root = BasicJsonType(std::forward(v)); return &root; } @@ -254,6 +262,17 @@ class json_sax_dom_parser : public json_sax } } } + + /// the parsed JSON value + BasicJsonType& root; + /// stack to model hierarchy of values + std::vector ref_stack; + /// helper to hold the reference for the next object element + BasicJsonType* object_element = nullptr; + /// whether a syntax error occurred + bool errored = false; + /// whether to throw exceptions in case of errors + const bool allow_exceptions = true; }; } diff --git a/include/nlohmann/detail/input/parser.hpp b/include/nlohmann/detail/input/parser.hpp index ece475bb..23fac952 100644 --- a/include/nlohmann/detail/input/parser.hpp +++ b/include/nlohmann/detail/input/parser.hpp @@ -63,11 +63,10 @@ class parser const parser_callback_t cb = nullptr, const bool allow_exceptions_ = true) : callback(cb), m_lexer(adapter), allow_exceptions(allow_exceptions_) - {} - - parser(detail::input_adapter_t adapter, json_sax_t* s) - : m_lexer(adapter), sax(s) - {} + { + // read first token + get_token(); + } /*! @brief public parser interface @@ -81,31 +80,52 @@ class parser */ void parse(const bool strict, BasicJsonType& result) { - // read first token - get_token(); - - parse_internal(true, result); - result.assert_invariant(); - - // in strict mode, input must be completely read - if (strict) + if (callback) { - get_token(); - expect(token_type::end_of_input); + parse_internal(true, result); + result.assert_invariant(); + + // in strict mode, input must be completely read + if (strict) + { + get_token(); + expect(token_type::end_of_input); + } + + // in case of an error, return discarded value + if (errored) + { + result = value_t::discarded; + return; + } + + // set top-level value to null if it was discarded by the callback + // function + if (result.is_discarded()) + { + result = nullptr; + } } - - // in case of an error, return discarded value - if (errored) + else { - result = value_t::discarded; - return; - } + json_sax_dom_parser sdp(result, allow_exceptions); + sax_parse_internal(&sdp); + result.assert_invariant(); - // set top-level value to null if it was discarded by the callback - // function - if (result.is_discarded()) - { - result = nullptr; + // in strict mode, input must be completely read + if (strict and (get_token() != token_type::end_of_input)) + { + sdp.parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + exception_message(token_type::end_of_input)); + } + + // in case of an error, return discarded value + if (sdp.is_errored()) + { + result = value_t::discarded; + return; + } } } @@ -117,9 +137,6 @@ class parser */ bool accept(const bool strict = true) { - // read first token - get_token(); - if (not accept_internal()) { return false; @@ -129,12 +146,9 @@ class parser return not strict or (get_token() == token_type::end_of_input); } - bool sax_parse() + bool sax_parse(json_sax_t* sax) { - // read first token - get_token(); - - return sax_parse_internal(); + return sax_parse_internal(sax); } private: @@ -535,7 +549,7 @@ class parser } } - bool sax_parse_internal() + bool sax_parse_internal(json_sax_t* sax) { switch (last_token) { @@ -584,7 +598,7 @@ class parser // parse value get_token(); - if (not sax_parse_internal()) + if (not sax_parse_internal(sax)) { return false; } @@ -631,7 +645,7 @@ class parser while (true) { // parse value - if (not sax_parse_internal()) + if (not sax_parse_internal(sax)) { return false; } @@ -782,8 +796,6 @@ class parser bool errored = false; /// whether to throw exceptions in case of errors const bool allow_exceptions = true; - /// associated SAX parse event receiver - json_sax_t* sax = nullptr; }; } } diff --git a/include/nlohmann/json.hpp b/include/nlohmann/json.hpp index 7eb0a352..cef504ff 100644 --- a/include/nlohmann/json.hpp +++ b/include/nlohmann/json.hpp @@ -5930,12 +5930,12 @@ class basic_json static bool sax_parse(detail::input_adapter i, json_sax_t* sax) { - return parser(i, sax).sax_parse(); + return parser(i).sax_parse(sax); } static bool sax_parse(detail::input_adapter& i, json_sax_t* sax) { - return parser(i, sax).sax_parse(); + return parser(i).sax_parse(sax); } /*! @@ -6013,7 +6013,7 @@ class basic_json typename std::iterator_traits::iterator_category>::value, int>::type = 0> static bool sax_parse(IteratorType first, IteratorType last, json_sax_t* sax) { - return parser(detail::input_adapter(first, last), sax).sax_parse(); + return parser(detail::input_adapter(first, last)).sax_parse(sax); } /*! diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index 3b1f7e43..8aaa1f68 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -3266,6 +3266,10 @@ class json_sax_dom_parser : public json_sax using number_unsigned_t = typename BasicJsonType::number_unsigned_t; using number_float_t = typename BasicJsonType::number_float_t; + json_sax_dom_parser(BasicJsonType& r, const bool allow_exceptions_ = true) + : root(r), allow_exceptions(allow_exceptions_) + {} + bool null() override { handle_value(nullptr); @@ -3338,25 +3342,30 @@ class json_sax_dom_parser : public json_sax return true; } - bool parse_error(std::size_t position, const std::string&, const std::string& error_msg) override + bool parse_error(std::size_t position, const std::string& token, + const std::string& error_msg) override { - JSON_THROW(BasicJsonType::parse_error::create(101, position, error_msg)); + errored = true; + if (allow_exceptions) + { + if (error_msg == "number overflow") + { + JSON_THROW(BasicJsonType::out_of_range::create(406, "number overflow parsing '" + token + "'")); + } + else + { + JSON_THROW(BasicJsonType::parse_error::create(101, position, error_msg)); + } + } return false; } - BasicJsonType& get_value() + bool is_errored() const { - return root; + return errored; } private: - /// the parsed JSON value - BasicJsonType root; - /// stack to model hierarchy of values - std::vector ref_stack; - /// helper to hold the reference for the next object element - BasicJsonType* object_element = nullptr; - /*! @invariant If the ref stack is empty, then the passed value will be the new root. @@ -3368,7 +3377,6 @@ class json_sax_dom_parser : public json_sax { if (ref_stack.empty()) { - assert(root.is_null()); root = BasicJsonType(std::forward(v)); return &root; } @@ -3388,6 +3396,17 @@ class json_sax_dom_parser : public json_sax } } } + + /// the parsed JSON value + BasicJsonType& root; + /// stack to model hierarchy of values + std::vector ref_stack; + /// helper to hold the reference for the next object element + BasicJsonType* object_element = nullptr; + /// whether a syntax error occurred + bool errored = false; + /// whether to throw exceptions in case of errors + const bool allow_exceptions = true; }; } @@ -3447,11 +3466,10 @@ class parser const parser_callback_t cb = nullptr, const bool allow_exceptions_ = true) : callback(cb), m_lexer(adapter), allow_exceptions(allow_exceptions_) - {} - - parser(detail::input_adapter_t adapter, json_sax_t* s) - : m_lexer(adapter), sax(s) - {} + { + // read first token + get_token(); + } /*! @brief public parser interface @@ -3465,31 +3483,52 @@ class parser */ void parse(const bool strict, BasicJsonType& result) { - // read first token - get_token(); - - parse_internal(true, result); - result.assert_invariant(); - - // in strict mode, input must be completely read - if (strict) + if (callback) { - get_token(); - expect(token_type::end_of_input); + parse_internal(true, result); + result.assert_invariant(); + + // in strict mode, input must be completely read + if (strict) + { + get_token(); + expect(token_type::end_of_input); + } + + // in case of an error, return discarded value + if (errored) + { + result = value_t::discarded; + return; + } + + // set top-level value to null if it was discarded by the callback + // function + if (result.is_discarded()) + { + result = nullptr; + } } - - // in case of an error, return discarded value - if (errored) + else { - result = value_t::discarded; - return; - } + json_sax_dom_parser sdp(result, allow_exceptions); + sax_parse_internal(&sdp); + result.assert_invariant(); - // set top-level value to null if it was discarded by the callback - // function - if (result.is_discarded()) - { - result = nullptr; + // in strict mode, input must be completely read + if (strict and (get_token() != token_type::end_of_input)) + { + sdp.parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + exception_message(token_type::end_of_input)); + } + + // in case of an error, return discarded value + if (sdp.is_errored()) + { + result = value_t::discarded; + return; + } } } @@ -3501,9 +3540,6 @@ class parser */ bool accept(const bool strict = true) { - // read first token - get_token(); - if (not accept_internal()) { return false; @@ -3513,12 +3549,9 @@ class parser return not strict or (get_token() == token_type::end_of_input); } - bool sax_parse() + bool sax_parse(json_sax_t* sax) { - // read first token - get_token(); - - return sax_parse_internal(); + return sax_parse_internal(sax); } private: @@ -3919,7 +3952,7 @@ class parser } } - bool sax_parse_internal() + bool sax_parse_internal(json_sax_t* sax) { switch (last_token) { @@ -3968,7 +4001,7 @@ class parser // parse value get_token(); - if (not sax_parse_internal()) + if (not sax_parse_internal(sax)) { return false; } @@ -4015,7 +4048,7 @@ class parser while (true) { // parse value - if (not sax_parse_internal()) + if (not sax_parse_internal(sax)) { return false; } @@ -4166,8 +4199,6 @@ class parser bool errored = false; /// whether to throw exceptions in case of errors const bool allow_exceptions = true; - /// associated SAX parse event receiver - json_sax_t* sax = nullptr; }; } } @@ -15989,12 +16020,12 @@ class basic_json static bool sax_parse(detail::input_adapter i, json_sax_t* sax) { - return parser(i, sax).sax_parse(); + return parser(i).sax_parse(sax); } static bool sax_parse(detail::input_adapter& i, json_sax_t* sax) { - return parser(i, sax).sax_parse(); + return parser(i).sax_parse(sax); } /*! @@ -16072,7 +16103,7 @@ class basic_json typename std::iterator_traits::iterator_category>::value, int>::type = 0> static bool sax_parse(IteratorType first, IteratorType last, json_sax_t* sax) { - return parser(detail::input_adapter(first, last), sax).sax_parse(); + return parser(detail::input_adapter(first, last)).sax_parse(sax); } /*! diff --git a/test/src/unit-class_parser.cpp b/test/src/unit-class_parser.cpp index a15ad167..39753d68 100644 --- a/test/src/unit-class_parser.cpp +++ b/test/src/unit-class_parser.cpp @@ -148,9 +148,10 @@ json parser_helper(const std::string& s) CHECK_NOTHROW(json::parser(nlohmann::detail::input_adapter(s), nullptr, false).parse(true, j_nothrow)); CHECK(j_nothrow == j); - nlohmann::json_sax_dom_parser sdp; + json j_sax; + nlohmann::json_sax_dom_parser sdp(j_sax); json::sax_parse(s, &sdp); - CHECK(sdp.get_value() == j); + CHECK(j_sax == j); return j; } From 8b379948d0c77c0e2b3563d5aaf1cc6911fffa25 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Wed, 7 Mar 2018 22:40:48 +0100 Subject: [PATCH 12/43] :fire: replaced acceptor with SAX parser --- include/nlohmann/detail/input/json_sax.hpp | 74 ++++++++ include/nlohmann/detail/input/parser.hpp | 117 +------------ single_include/nlohmann/json.hpp | 191 +++++++++------------ 3 files changed, 154 insertions(+), 228 deletions(-) diff --git a/include/nlohmann/detail/input/json_sax.hpp b/include/nlohmann/detail/input/json_sax.hpp index 06227e70..e2b8baad 100644 --- a/include/nlohmann/detail/input/json_sax.hpp +++ b/include/nlohmann/detail/input/json_sax.hpp @@ -275,5 +275,79 @@ class json_sax_dom_parser : public json_sax const bool allow_exceptions = true; }; +template +class json_sax_acceptor : public json_sax +{ + public: + using number_integer_t = typename BasicJsonType::number_integer_t; + using number_unsigned_t = typename BasicJsonType::number_unsigned_t; + using number_float_t = typename BasicJsonType::number_float_t; + + bool null() override + { + return true; + } + + bool boolean(bool) override + { + return true; + } + + bool number_integer(number_integer_t) override + { + return true; + } + + bool number_unsigned(number_unsigned_t) override + { + return true; + } + + bool number_float(number_float_t, const std::string&) override + { + return true; + } + + bool string(std::string&&) override + { + return true; + } + + bool start_object(std::size_t) override + { + return true; + } + + bool key(std::string&&) override + { + return true; + } + + bool end_object() override + { + return true; + } + + bool start_array(std::size_t) override + { + return true; + } + + bool end_array() override + { + return true; + } + + bool binary(const std::vector&) override + { + return true; + } + + bool parse_error(std::size_t, const std::string&, const std::string&) override + { + return false; + } +}; + } diff --git a/include/nlohmann/detail/input/parser.hpp b/include/nlohmann/detail/input/parser.hpp index 23fac952..5c0c2e94 100644 --- a/include/nlohmann/detail/input/parser.hpp +++ b/include/nlohmann/detail/input/parser.hpp @@ -137,7 +137,9 @@ class parser */ bool accept(const bool strict = true) { - if (not accept_internal()) + json_sax_acceptor sax_acceptor; + + if (not sax_parse_internal(&sax_acceptor)) { return false; } @@ -436,119 +438,6 @@ class parser } } - /*! - @brief the actual acceptor - - @invariant 1. The last token is not yet processed. Therefore, the caller - of this function must make sure a token has been read. - 2. When this function returns, the last token is processed. - That is, the last read character was already considered. - - This invariant makes sure that no token needs to be "unput". - */ - bool accept_internal() - { - switch (last_token) - { - case token_type::begin_object: - { - // read next token - get_token(); - - // closing } -> we are done - if (last_token == token_type::end_object) - { - return true; - } - - // parse values - while (true) - { - // parse key - if (last_token != token_type::value_string) - { - return false; - } - - // parse separator (:) - get_token(); - if (last_token != token_type::name_separator) - { - return false; - } - - // parse value - get_token(); - if (not accept_internal()) - { - return false; - } - - // comma -> next value - get_token(); - if (last_token == token_type::value_separator) - { - get_token(); - continue; - } - - // closing } - return (last_token == token_type::end_object); - } - } - - case token_type::begin_array: - { - // read next token - get_token(); - - // closing ] -> we are done - if (last_token == token_type::end_array) - { - return true; - } - - // parse values - while (true) - { - // parse value - if (not accept_internal()) - { - return false; - } - - // comma -> next value - get_token(); - if (last_token == token_type::value_separator) - { - get_token(); - continue; - } - - // closing ] - return (last_token == token_type::end_array); - } - } - - case token_type::value_float: - { - // reject infinity or NAN - return std::isfinite(m_lexer.get_number_float()); - } - - case token_type::literal_false: - case token_type::literal_null: - case token_type::literal_true: - case token_type::value_integer: - case token_type::value_string: - case token_type::value_unsigned: - return true; - - default: // the last token was unexpected - return false; - } - } - bool sax_parse_internal(json_sax_t* sax) { switch (last_token) diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index 9fc301e2..ff992269 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -3409,6 +3409,80 @@ class json_sax_dom_parser : public json_sax const bool allow_exceptions = true; }; +template +class json_sax_acceptor : public json_sax +{ + public: + using number_integer_t = typename BasicJsonType::number_integer_t; + using number_unsigned_t = typename BasicJsonType::number_unsigned_t; + using number_float_t = typename BasicJsonType::number_float_t; + + bool null() override + { + return true; + } + + bool boolean(bool) override + { + return true; + } + + bool number_integer(number_integer_t) override + { + return true; + } + + bool number_unsigned(number_unsigned_t) override + { + return true; + } + + bool number_float(number_float_t, const std::string&) override + { + return true; + } + + bool string(std::string&&) override + { + return true; + } + + bool start_object(std::size_t) override + { + return true; + } + + bool key(std::string&&) override + { + return true; + } + + bool end_object() override + { + return true; + } + + bool start_array(std::size_t) override + { + return true; + } + + bool end_array() override + { + return true; + } + + bool binary(const std::vector&) override + { + return true; + } + + bool parse_error(std::size_t, const std::string&, const std::string&) override + { + return false; + } +}; + } @@ -3540,7 +3614,9 @@ class parser */ bool accept(const bool strict = true) { - if (not accept_internal()) + json_sax_acceptor sax_acceptor; + + if (not sax_parse_internal(&sax_acceptor)) { return false; } @@ -3839,119 +3915,6 @@ class parser } } - /*! - @brief the actual acceptor - - @invariant 1. The last token is not yet processed. Therefore, the caller - of this function must make sure a token has been read. - 2. When this function returns, the last token is processed. - That is, the last read character was already considered. - - This invariant makes sure that no token needs to be "unput". - */ - bool accept_internal() - { - switch (last_token) - { - case token_type::begin_object: - { - // read next token - get_token(); - - // closing } -> we are done - if (last_token == token_type::end_object) - { - return true; - } - - // parse values - while (true) - { - // parse key - if (last_token != token_type::value_string) - { - return false; - } - - // parse separator (:) - get_token(); - if (last_token != token_type::name_separator) - { - return false; - } - - // parse value - get_token(); - if (not accept_internal()) - { - return false; - } - - // comma -> next value - get_token(); - if (last_token == token_type::value_separator) - { - get_token(); - continue; - } - - // closing } - return (last_token == token_type::end_object); - } - } - - case token_type::begin_array: - { - // read next token - get_token(); - - // closing ] -> we are done - if (last_token == token_type::end_array) - { - return true; - } - - // parse values - while (true) - { - // parse value - if (not accept_internal()) - { - return false; - } - - // comma -> next value - get_token(); - if (last_token == token_type::value_separator) - { - get_token(); - continue; - } - - // closing ] - return (last_token == token_type::end_array); - } - } - - case token_type::value_float: - { - // reject infinity or NAN - return std::isfinite(m_lexer.get_number_float()); - } - - case token_type::literal_false: - case token_type::literal_null: - case token_type::literal_true: - case token_type::value_integer: - case token_type::value_string: - case token_type::value_unsigned: - return true; - - default: // the last token was unexpected - return false; - } - } - bool sax_parse_internal(json_sax_t* sax) { switch (last_token) From 149d2fd09c050fe614703baa38a83fb624b490bc Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Sat, 10 Mar 2018 11:24:00 +0100 Subject: [PATCH 13/43] :green_heart: improved test coverage --- include/nlohmann/detail/input/parser.hpp | 37 +++++++++--------------- single_include/nlohmann/json.hpp | 37 +++++++++--------------- test/src/unit-class_parser.cpp | 13 ++++++++- 3 files changed, 38 insertions(+), 49 deletions(-) diff --git a/include/nlohmann/detail/input/parser.hpp b/include/nlohmann/detail/input/parser.hpp index 8346cd4b..32f2be88 100644 --- a/include/nlohmann/detail/input/parser.hpp +++ b/include/nlohmann/detail/input/parser.hpp @@ -164,6 +164,8 @@ class parser { // never parse after a parse error was detected assert(not errored); + // this function is only called when a callback is given + assert(callback); // start with a discarded value if (not result.is_discarded()) @@ -178,12 +180,9 @@ class parser { if (keep) { - if (callback) - { - keep = callback(depth++, parse_event_t::object_start, result); - } + keep = callback(depth++, parse_event_t::object_start, result); - if (not callback or keep) + if (keep) { // explicitly set result to object to cope with {} result.m_type = value_t::object; @@ -197,7 +196,7 @@ class parser // closing } -> we are done if (last_token == token_type::end_object) { - if (keep and callback and not callback(--depth, parse_event_t::object_end, result)) + if (keep and not callback(--depth, parse_event_t::object_end, result)) { result.m_value.destroy(result.m_type); result.m_type = value_t::discarded; @@ -220,15 +219,8 @@ class parser bool keep_tag = false; if (keep) { - if (callback) - { - BasicJsonType k(key); - keep_tag = callback(depth, parse_event_t::key, k); - } - else - { - keep_tag = true; - } + BasicJsonType k(key); + keep_tag = callback(depth, parse_event_t::key, k); } // parse separator (:) @@ -270,7 +262,7 @@ class parser break; } - if (keep and callback and not callback(--depth, parse_event_t::object_end, result)) + if (keep and not callback(--depth, parse_event_t::object_end, result)) { result.m_value.destroy(result.m_type); result.m_type = value_t::discarded; @@ -282,12 +274,9 @@ class parser { if (keep) { - if (callback) - { - keep = callback(depth++, parse_event_t::array_start, result); - } + keep = callback(depth++, parse_event_t::array_start, result); - if (not callback or keep) + if (keep) { // explicitly set result to array to cope with [] result.m_type = value_t::array; @@ -301,7 +290,7 @@ class parser // closing ] -> we are done if (last_token == token_type::end_array) { - if (callback and not callback(--depth, parse_event_t::array_end, result)) + if (not callback(--depth, parse_event_t::array_end, result)) { result.m_value.destroy(result.m_type); result.m_type = value_t::discarded; @@ -344,7 +333,7 @@ class parser break; } - if (keep and callback and not callback(--depth, parse_event_t::array_end, result)) + if (keep and not callback(--depth, parse_event_t::array_end, result)) { result.m_value.destroy(result.m_type); result.m_type = value_t::discarded; @@ -432,7 +421,7 @@ class parser } } - if (keep and callback and not callback(depth, parse_event_t::value, result)) + if (keep and not callback(depth, parse_event_t::value, result)) { result.m_value.destroy(result.m_type); result.m_type = value_t::discarded; diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index cdd55475..666150d9 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -3641,6 +3641,8 @@ class parser { // never parse after a parse error was detected assert(not errored); + // this function is only called when a callback is given + assert(callback); // start with a discarded value if (not result.is_discarded()) @@ -3655,12 +3657,9 @@ class parser { if (keep) { - if (callback) - { - keep = callback(depth++, parse_event_t::object_start, result); - } + keep = callback(depth++, parse_event_t::object_start, result); - if (not callback or keep) + if (keep) { // explicitly set result to object to cope with {} result.m_type = value_t::object; @@ -3674,7 +3673,7 @@ class parser // closing } -> we are done if (last_token == token_type::end_object) { - if (keep and callback and not callback(--depth, parse_event_t::object_end, result)) + if (keep and not callback(--depth, parse_event_t::object_end, result)) { result.m_value.destroy(result.m_type); result.m_type = value_t::discarded; @@ -3697,15 +3696,8 @@ class parser bool keep_tag = false; if (keep) { - if (callback) - { - BasicJsonType k(key); - keep_tag = callback(depth, parse_event_t::key, k); - } - else - { - keep_tag = true; - } + BasicJsonType k(key); + keep_tag = callback(depth, parse_event_t::key, k); } // parse separator (:) @@ -3747,7 +3739,7 @@ class parser break; } - if (keep and callback and not callback(--depth, parse_event_t::object_end, result)) + if (keep and not callback(--depth, parse_event_t::object_end, result)) { result.m_value.destroy(result.m_type); result.m_type = value_t::discarded; @@ -3759,12 +3751,9 @@ class parser { if (keep) { - if (callback) - { - keep = callback(depth++, parse_event_t::array_start, result); - } + keep = callback(depth++, parse_event_t::array_start, result); - if (not callback or keep) + if (keep) { // explicitly set result to array to cope with [] result.m_type = value_t::array; @@ -3778,7 +3767,7 @@ class parser // closing ] -> we are done if (last_token == token_type::end_array) { - if (callback and not callback(--depth, parse_event_t::array_end, result)) + if (not callback(--depth, parse_event_t::array_end, result)) { result.m_value.destroy(result.m_type); result.m_type = value_t::discarded; @@ -3821,7 +3810,7 @@ class parser break; } - if (keep and callback and not callback(--depth, parse_event_t::array_end, result)) + if (keep and not callback(--depth, parse_event_t::array_end, result)) { result.m_value.destroy(result.m_type); result.m_type = value_t::discarded; @@ -3909,7 +3898,7 @@ class parser } } - if (keep and callback and not callback(depth, parse_event_t::value, result)) + if (keep and not callback(depth, parse_event_t::value, result)) { result.m_value.destroy(result.m_type); result.m_type = value_t::discarded; diff --git a/test/src/unit-class_parser.cpp b/test/src/unit-class_parser.cpp index 39753d68..d84148eb 100644 --- a/test/src/unit-class_parser.cpp +++ b/test/src/unit-class_parser.cpp @@ -176,7 +176,18 @@ bool accept_helper(const std::string& s) CHECK_NOTHROW(json::sax_parse(s, &el)); CHECK(json::parser(nlohmann::detail::input_adapter(s)).accept(false) == not el.errored); - // 5. return result + // 5. parse with simple callback + json::parser_callback_t cb = [](int, json::parse_event_t, json&) + { + return true; + }; + json j_cb = json::parse(s, cb, false); + const bool ok_noexcept_cb = not j_cb.is_discarded(); + + // 6. check if this approach came to the same result + CHECK(ok_noexcept == ok_noexcept_cb); + + // 7. return result return ok_accept; } From ad47b0fbde0425838991da876adf40417110489d Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Sun, 11 Mar 2018 18:47:38 +0100 Subject: [PATCH 14/43] :recycle: refactored binary readers to use a SAX parser --- .../nlohmann/detail/input/binary_reader.hpp | 518 +++++++++-------- include/nlohmann/detail/input/json_sax.hpp | 21 +- include/nlohmann/json.hpp | 2 +- single_include/nlohmann/json.hpp | 542 ++++++++++-------- test/src/unit-class_parser.cpp | 2 +- 5 files changed, 592 insertions(+), 493 deletions(-) diff --git a/include/nlohmann/detail/input/binary_reader.hpp b/include/nlohmann/detail/input/binary_reader.hpp index d4ca38f5..e7341352 100644 --- a/include/nlohmann/detail/input/binary_reader.hpp +++ b/include/nlohmann/detail/input/binary_reader.hpp @@ -16,6 +16,7 @@ #include // make_pair, move #include +#include #include #include #include @@ -37,6 +38,7 @@ class binary_reader using number_integer_t = typename BasicJsonType::number_integer_t; using number_unsigned_t = typename BasicJsonType::number_unsigned_t; using string_t = typename BasicJsonType::string_t; + using json_sax_t = json_sax; public: /*! @@ -61,13 +63,16 @@ class binary_reader */ BasicJsonType parse_cbor(const bool strict) { - const auto res = parse_cbor_internal(); + BasicJsonType result; + json_sax_dom_parser sdp(result); + parse_cbor_sax_internal(&sdp); + result.assert_invariant(); if (strict) { get(); expect_eof(); } - return res; + return result; } /*! @@ -82,13 +87,16 @@ class binary_reader */ BasicJsonType parse_msgpack(const bool strict) { - const auto res = parse_msgpack_internal(); + BasicJsonType result; + json_sax_dom_parser sdp(result); + parse_msgpack_sax_internal(&sdp); + result.assert_invariant(); if (strict) { get(); expect_eof(); } - return res; + return result; } /*! @@ -103,13 +111,16 @@ class binary_reader */ BasicJsonType parse_ubjson(const bool strict) { - const auto res = parse_ubjson_internal(); + BasicJsonType result; + json_sax_dom_parser sdp(result); + parse_ubjson_sax_internal(&sdp); + result.assert_invariant(); if (strict) { get_ignore_noop(); expect_eof(); } - return res; + return result; } /*! @@ -130,13 +141,14 @@ class binary_reader input (true, default) or whether the last read character should be considered instead */ - BasicJsonType parse_cbor_internal(const bool get_char = true) + bool parse_cbor_sax_internal(json_sax_t* sax, const bool get_char = true) { switch (get_char ? get() : current) { // EOF case std::char_traits::eof(): - JSON_THROW(parse_error::create(110, chars_read, "unexpected end of input")); + unexpect_eof(); + return sax->parse_error(chars_read, "", "unexpected end of input"); // Integer 0x00..0x17 (0..23) case 0x00: @@ -163,19 +175,19 @@ class binary_reader case 0x15: case 0x16: case 0x17: - return static_cast(current); + return sax->number_unsigned(static_cast(current)); case 0x18: // Unsigned integer (one-byte uint8_t follows) - return get_number(); + return sax->number_unsigned(get_number()); case 0x19: // Unsigned integer (two-byte uint16_t follows) - return get_number(); + return sax->number_unsigned(get_number()); case 0x1A: // Unsigned integer (four-byte uint32_t follows) - return get_number(); + return sax->number_unsigned(get_number()); case 0x1B: // Unsigned integer (eight-byte uint64_t follows) - return get_number(); + return sax->number_unsigned(get_number()); // Negative integer -1-0x00..-1-0x17 (-1..-24) case 0x20: @@ -202,28 +214,20 @@ class binary_reader case 0x35: case 0x36: case 0x37: - return static_cast(0x20 - 1 - current); + return sax->number_integer(static_cast(0x20 - 1 - current)); case 0x38: // Negative integer (one-byte uint8_t follows) - { - return static_cast(-1) - get_number(); - } + return sax->number_integer(static_cast(-1) - get_number()); case 0x39: // Negative integer -1-n (two-byte uint16_t follows) - { - return static_cast(-1) - get_number(); - } + return sax->number_integer(static_cast(-1) - get_number()); case 0x3A: // Negative integer -1-n (four-byte uint32_t follows) - { - return static_cast(-1) - get_number(); - } + return sax->number_integer(static_cast(-1) - get_number()); case 0x3B: // Negative integer -1-n (eight-byte uint64_t follows) - { - return static_cast(-1) - - static_cast(get_number()); - } + return sax->number_integer(static_cast(-1) - + static_cast(get_number())); // UTF-8 string (0x00..0x17 bytes follow) case 0x60: @@ -255,9 +259,7 @@ class binary_reader case 0x7A: // UTF-8 string (four-byte uint32_t for n follow) case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow) case 0x7F: // UTF-8 string (indefinite length) - { - return get_cbor_string(); - } + return sax->string(get_cbor_string()); // array (0x00..0x17 data items follow) case 0x80: @@ -284,39 +286,22 @@ class binary_reader case 0x95: case 0x96: case 0x97: - { - return get_cbor_array(current & 0x1F); - } + return get_cbor_array_sax(sax, static_cast(current & 0x1F)); case 0x98: // array (one-byte uint8_t for n follows) - { - return get_cbor_array(get_number()); - } + return get_cbor_array_sax(sax, static_cast(get_number())); case 0x99: // array (two-byte uint16_t for n follow) - { - return get_cbor_array(get_number()); - } + return get_cbor_array_sax(sax, static_cast(get_number())); case 0x9A: // array (four-byte uint32_t for n follow) - { - return get_cbor_array(get_number()); - } + return get_cbor_array_sax(sax, static_cast(get_number())); case 0x9B: // array (eight-byte uint64_t for n follow) - { - return get_cbor_array(get_number()); - } + return get_cbor_array_sax(sax, static_cast(get_number())); case 0x9F: // array (indefinite length) - { - BasicJsonType result = value_t::array; - while (get() != 0xFF) - { - result.push_back(parse_cbor_internal(false)); - } - return result; - } + return get_cbor_array_sax(sax, json_sax_t::no_limit); // map (0x00..0x17 pairs of data items follow) case 0xA0: @@ -343,55 +328,31 @@ class binary_reader case 0xB5: case 0xB6: case 0xB7: - { - return get_cbor_object(current & 0x1F); - } + return get_cbor_object_sax(sax, static_cast(current & 0x1F)); case 0xB8: // map (one-byte uint8_t for n follows) - { - return get_cbor_object(get_number()); - } + return get_cbor_object_sax(sax, static_cast(get_number())); case 0xB9: // map (two-byte uint16_t for n follow) - { - return get_cbor_object(get_number()); - } + return get_cbor_object_sax(sax, static_cast(get_number())); case 0xBA: // map (four-byte uint32_t for n follow) - { - return get_cbor_object(get_number()); - } + return get_cbor_object_sax(sax, static_cast(get_number())); case 0xBB: // map (eight-byte uint64_t for n follow) - { - return get_cbor_object(get_number()); - } + return get_cbor_object_sax(sax, static_cast(get_number())); case 0xBF: // map (indefinite length) - { - BasicJsonType result = value_t::object; - while (get() != 0xFF) - { - auto key = get_cbor_string(); - result[key] = parse_cbor_internal(); - } - return result; - } + return get_cbor_object_sax(sax, json_sax_t::no_limit); case 0xF4: // false - { - return false; - } + return sax->boolean(false); case 0xF5: // true - { - return true; - } + return sax->boolean(true); case 0xF6: // null - { - return value_t::null; - } + return sax->null(); case 0xF9: // Half-Precision Float (two-byte IEEE 754) { @@ -425,18 +386,14 @@ class binary_reader val = (mant == 0) ? std::numeric_limits::infinity() : std::numeric_limits::quiet_NaN(); } - return (half & 0x8000) != 0 ? -val : val; + return sax->number_float((half & 0x8000) != 0 ? -val : val, ""); } case 0xFA: // Single-Precision Float (four-byte IEEE 754) - { - return get_number(); - } + return sax->number_float(static_cast(get_number()), ""); case 0xFB: // Double-Precision Float (eight-byte IEEE 754) - { - return get_number(); - } + return sax->number_float(get_number(), ""); default: // anything else (0xFF is handled inside the other types) { @@ -447,13 +404,14 @@ class binary_reader } } - BasicJsonType parse_msgpack_internal() + bool parse_msgpack_sax_internal(json_sax_t* sax) { switch (get()) { // EOF case std::char_traits::eof(): - JSON_THROW(parse_error::create(110, chars_read, "unexpected end of input")); + unexpect_eof(); + return sax->parse_error(chars_read, "", "unexpected end of input"); // positive fixint case 0x00: @@ -584,7 +542,7 @@ class binary_reader case 0x7D: case 0x7E: case 0x7F: - return static_cast(current); + return sax->number_unsigned(static_cast(current)); // fixmap case 0x80: @@ -603,9 +561,7 @@ class binary_reader case 0x8D: case 0x8E: case 0x8F: - { - return get_msgpack_object(current & 0x0F); - } + return get_msgpack_object_sax(sax, static_cast(current & 0x0F)); // fixarray case 0x90: @@ -624,9 +580,7 @@ class binary_reader case 0x9D: case 0x9E: case 0x9F: - { - return get_msgpack_array(current & 0x0F); - } + return get_msgpack_array_sax(sax, static_cast(current & 0x0F)); // fixstr case 0xA0: @@ -661,73 +615,65 @@ class binary_reader case 0xBD: case 0xBE: case 0xBF: - return get_msgpack_string(); + return sax->string(get_msgpack_string()); case 0xC0: // nil - return value_t::null; + return sax->null(); case 0xC2: // false - return false; + return sax->boolean(false); case 0xC3: // true - return true; + return sax->boolean(true); case 0xCA: // float 32 - return get_number(); + return sax->number_float(static_cast(get_number()), ""); case 0xCB: // float 64 - return get_number(); + return sax->number_float(get_number(), ""); case 0xCC: // uint 8 - return get_number(); + return sax->number_unsigned(get_number()); case 0xCD: // uint 16 - return get_number(); + return sax->number_unsigned(get_number()); case 0xCE: // uint 32 - return get_number(); + return sax->number_unsigned(get_number()); case 0xCF: // uint 64 - return get_number(); + return sax->number_unsigned(get_number()); case 0xD0: // int 8 - return get_number(); + return sax->number_integer(get_number()); case 0xD1: // int 16 - return get_number(); + return sax->number_integer(get_number()); case 0xD2: // int 32 - return get_number(); + return sax->number_integer(get_number()); case 0xD3: // int 64 - return get_number(); + return sax->number_integer(get_number()); case 0xD9: // str 8 case 0xDA: // str 16 case 0xDB: // str 32 - return get_msgpack_string(); + return sax->string(get_msgpack_string()); case 0xDC: // array 16 - { - return get_msgpack_array(get_number()); - } + return get_msgpack_array_sax(sax, static_cast(get_number())); case 0xDD: // array 32 - { - return get_msgpack_array(get_number()); - } + return get_msgpack_array_sax(sax, static_cast(get_number())); case 0xDE: // map 16 - { - return get_msgpack_object(get_number()); - } + return get_msgpack_object_sax(sax, static_cast(get_number())); case 0xDF: // map 32 - { - return get_msgpack_object(get_number()); - } + return get_msgpack_object_sax(sax, static_cast(get_number())); - // positive fixint + // negative fixint case 0xE0: case 0xE1: case 0xE2: @@ -760,7 +706,7 @@ class binary_reader case 0xFD: case 0xFE: case 0xFF: - return static_cast(current); + return sax->number_integer(static_cast(current)); default: // anything else { @@ -777,9 +723,9 @@ class binary_reader input (true, default) or whether the last read character should be considered instead */ - BasicJsonType parse_ubjson_internal(const bool get_char = true) + bool parse_ubjson_sax_internal(json_sax_t* sax, const bool get_char = true) { - return get_ubjson_value(get_char ? get_ignore_noop() : current); + return get_ubjson_sax_value(sax, get_char ? get_ignore_noop() : current); } /*! @@ -919,29 +865,19 @@ class binary_reader case 0x75: case 0x76: case 0x77: - { return get_string(current & 0x1F); - } case 0x78: // UTF-8 string (one-byte uint8_t for n follows) - { return get_string(get_number()); - } case 0x79: // UTF-8 string (two-byte uint16_t for n follow) - { return get_string(get_number()); - } case 0x7A: // UTF-8 string (four-byte uint32_t for n follow) - { return get_string(get_number()); - } case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow) - { return get_string(get_number()); - } case 0x7F: // UTF-8 string (indefinite length) { @@ -962,31 +898,75 @@ class binary_reader } } - template - BasicJsonType get_cbor_array(const NumberType len) + bool get_cbor_array_sax(json_sax_t* sax, const std::size_t len) { - BasicJsonType result = value_t::array; - std::generate_n(std::back_inserter(*result.m_value.array), len, [this]() + if (not sax->start_array(len)) { - return parse_cbor_internal(); - }); - return result; + return false; + } + + if (len != json_sax_t::no_limit) + for (std::size_t i = 0; i < len; ++i) + { + if (not parse_cbor_sax_internal(sax)) + { + return false; + } + } + else + { + while (get() != 0xFF) + { + if (not parse_cbor_sax_internal(sax, false)) + { + return false; + } + } + } + + return sax->end_array(); } - template - BasicJsonType get_cbor_object(const NumberType len) + bool get_cbor_object_sax(json_sax_t* sax, const std::size_t len) { - BasicJsonType result = value_t::object; - std::generate_n(std::inserter(*result.m_value.object, - result.m_value.object->end()), - len, [this]() + if (not sax->start_object(len)) { - get(); - auto key = get_cbor_string(); - auto val = parse_cbor_internal(); - return std::make_pair(std::move(key), std::move(val)); - }); - return result; + return false; + } + + if (len != json_sax_t::no_limit) + { + for (std::size_t i = 0; i < len; ++i) + { + get(); + if (not sax->key(get_cbor_string())) + { + return false; + } + + if (not parse_cbor_sax_internal(sax)) + { + return false; + } + } + } + else + { + while (get() != 0xFF) + { + if (not sax->key(get_cbor_string())) + { + return false; + } + + if (not parse_cbor_sax_internal(sax)) + { + return false; + } + } + } + + return sax->end_object(); } /*! @@ -1039,24 +1019,16 @@ class binary_reader case 0xBD: case 0xBE: case 0xBF: - { return get_string(current & 0x1F); - } case 0xD9: // str 8 - { return get_string(get_number()); - } case 0xDA: // str 16 - { return get_string(get_number()); - } case 0xDB: // str 32 - { return get_string(get_number()); - } default: { @@ -1068,31 +1040,46 @@ class binary_reader } } - template - BasicJsonType get_msgpack_array(const NumberType len) + bool get_msgpack_array_sax(json_sax_t* sax, const std::size_t len) { - BasicJsonType result = value_t::array; - std::generate_n(std::back_inserter(*result.m_value.array), len, [this]() + if (not sax->start_array(len)) { - return parse_msgpack_internal(); - }); - return result; + return false; + } + + for (std::size_t i = 0; i < len; ++i) + { + if (not parse_msgpack_sax_internal(sax)) + { + return false; + } + } + + return sax->end_array(); } - template - BasicJsonType get_msgpack_object(const NumberType len) + bool get_msgpack_object_sax(json_sax_t* sax, const std::size_t len) { - BasicJsonType result = value_t::object; - std::generate_n(std::inserter(*result.m_value.object, - result.m_value.object->end()), - len, [this]() + if (not sax->start_object(len)) + { + return false; + } + + for (std::size_t i = 0; i < len; ++i) { get(); - auto key = get_msgpack_string(); - auto val = parse_msgpack_internal(); - return std::make_pair(std::move(key), std::move(val)); - }); - return result; + if (not sax->key(get_msgpack_string())) + { + return false; + } + + if (not parse_msgpack_sax_internal(sax)) + { + return false; + } + } + + return sax->end_object(); } /*! @@ -1140,6 +1127,25 @@ class binary_reader } } + std::size_t get_ubjson_size_value() + { + switch (get_ignore_noop()) + { + case 'U': + return static_cast(get_number()); + case 'i': + return static_cast(get_number()); + case 'I': + return static_cast(get_number()); + case 'l': + return static_cast(get_number()); + case 'L': + return static_cast(get_number()); + default: + return std::size_t(-1); + } + } + /*! @brief determine the type and size for a container @@ -1168,45 +1174,46 @@ class binary_reader JSON_THROW(parse_error::create(112, chars_read, "expected '#' after UBJSON type information; last byte: 0x" + ss.str())); } - sz = parse_ubjson_internal(); + sz = get_ubjson_size_value(); } else if (current == '#') { - sz = parse_ubjson_internal(); + sz = get_ubjson_size_value(); } return std::make_pair(sz, tc); } - BasicJsonType get_ubjson_value(const int prefix) + bool get_ubjson_sax_value(json_sax_t* sax, const int prefix) { switch (prefix) { case std::char_traits::eof(): // EOF - JSON_THROW(parse_error::create(110, chars_read, "unexpected end of input")); + unexpect_eof(); + return sax->parse_error(chars_read, "", "unexpected end of input"); case 'T': // true - return true; + return sax->boolean(true); case 'F': // false - return false; + return sax->boolean(false); case 'Z': // null - return nullptr; + return sax->null(); case 'U': - return get_number(); + return sax->number_unsigned(get_number()); case 'i': - return get_number(); + return sax->number_integer(get_number()); case 'I': - return get_number(); + return sax->number_integer(get_number()); case 'l': - return get_number(); + return sax->number_integer(get_number()); case 'L': - return get_number(); + return sax->number_integer(get_number()); case 'd': - return get_number(); + return sax->number_float(static_cast(get_number()), ""); case 'D': - return get_number(); + return sax->number_float(get_number(), ""); case 'C': // char { @@ -1219,17 +1226,17 @@ class binary_reader JSON_THROW(parse_error::create(113, chars_read, "byte after 'C' must be in range 0x00..0x7F; last byte: 0x" + ss.str())); } - return string_t(1, static_cast(current)); + return sax->string(string_t(1, static_cast(current))); } case 'S': // string - return get_ubjson_string(); + return sax->string(get_ubjson_string()); case '[': // array - return get_ubjson_array(); + return get_ubjson_sax_array(sax); case '{': // object - return get_ubjson_object(); + return get_ubjson_sax_object(sax); default: // anything else std::stringstream ss; @@ -1239,98 +1246,123 @@ class binary_reader } } - BasicJsonType get_ubjson_array() + bool get_ubjson_sax_array(json_sax_t* sax) { - BasicJsonType result = value_t::array; const auto size_and_type = get_ubjson_size_type(); if (size_and_type.first != string_t::npos) { - if (JSON_UNLIKELY(size_and_type.first > result.max_size())) + if (not sax->start_array(size_and_type.first)) { - JSON_THROW(out_of_range::create(408, - "excessive array size: " + std::to_string(size_and_type.first))); + return false; } if (size_and_type.second != 0) { if (size_and_type.second != 'N') { - std::generate_n(std::back_inserter(*result.m_value.array), - size_and_type.first, [this, size_and_type]() + for (std::size_t i = 0; i < size_and_type.first; ++i) { - return get_ubjson_value(size_and_type.second); - }); + if (not get_ubjson_sax_value(sax, size_and_type.second)) + { + return false; + } + } } } else { - std::generate_n(std::back_inserter(*result.m_value.array), - size_and_type.first, [this]() + for (std::size_t i = 0; i < size_and_type.first; ++i) { - return parse_ubjson_internal(); - }); + if (not parse_ubjson_sax_internal(sax)) + { + return false; + } + } } } else { + if (not sax->start_array()) + { + return false; + } + while (current != ']') { - result.push_back(parse_ubjson_internal(false)); + if (not parse_ubjson_sax_internal(sax, false)) + { + return false; + } get_ignore_noop(); } } - return result; + return sax->end_array(); } - BasicJsonType get_ubjson_object() + bool get_ubjson_sax_object(json_sax_t* sax) { - BasicJsonType result = value_t::object; const auto size_and_type = get_ubjson_size_type(); if (size_and_type.first != string_t::npos) { - if (JSON_UNLIKELY(size_and_type.first > result.max_size())) + if (not sax->start_object(size_and_type.first)) { - JSON_THROW(out_of_range::create(408, - "excessive object size: " + std::to_string(size_and_type.first))); + return false; } if (size_and_type.second != 0) { - std::generate_n(std::inserter(*result.m_value.object, - result.m_value.object->end()), - size_and_type.first, [this, size_and_type]() + for (std::size_t i = 0; i < size_and_type.first; ++i) { - auto key = get_ubjson_string(); - auto val = get_ubjson_value(size_and_type.second); - return std::make_pair(std::move(key), std::move(val)); - }); + if (not sax->key(get_ubjson_string())) + { + return false; + } + if (not get_ubjson_sax_value(sax, size_and_type.second)) + { + return false; + } + } } else { - std::generate_n(std::inserter(*result.m_value.object, - result.m_value.object->end()), - size_and_type.first, [this]() + for (std::size_t i = 0; i < size_and_type.first; ++i) { - auto key = get_ubjson_string(); - auto val = parse_ubjson_internal(); - return std::make_pair(std::move(key), std::move(val)); - }); + if (not sax->key(get_ubjson_string())) + { + return false; + } + if (not parse_ubjson_sax_internal(sax)) + { + return false; + } + } } } else { + if (not sax->start_object()) + { + return false; + } + while (current != '}') { - auto key = get_ubjson_string(false); - result[std::move(key)] = parse_ubjson_internal(); + if (not sax->key(get_ubjson_string(false))) + { + return false; + } + if (not parse_ubjson_sax_internal(sax)) + { + return false; + } get_ignore_noop(); } } - return result; + return sax->end_object(); } /*! diff --git a/include/nlohmann/detail/input/json_sax.hpp b/include/nlohmann/detail/input/json_sax.hpp index e2b8baad..bb98e785 100644 --- a/include/nlohmann/detail/input/json_sax.hpp +++ b/include/nlohmann/detail/input/json_sax.hpp @@ -124,6 +124,8 @@ struct json_sax }; +namespace detail +{ template class json_sax_dom_parser : public json_sax { @@ -172,9 +174,16 @@ class json_sax_dom_parser : public json_sax return true; } - bool start_object(std::size_t) override + bool start_object(std::size_t len) override { ref_stack.push_back(handle_value(BasicJsonType::value_t::object)); + + if (JSON_UNLIKELY(len != json_sax::no_limit and len > ref_stack.back()->max_size())) + { + JSON_THROW(out_of_range::create(408, + "excessive object size: " + std::to_string(len))); + } + return true; } @@ -191,9 +200,16 @@ class json_sax_dom_parser : public json_sax return true; } - bool start_array(std::size_t) override + bool start_array(std::size_t len) override { ref_stack.push_back(handle_value(BasicJsonType::value_t::array)); + + if (JSON_UNLIKELY(len != json_sax::no_limit and len > ref_stack.back()->max_size())) + { + JSON_THROW(out_of_range::create(408, + "excessive array size: " + std::to_string(len))); + } + return true; } @@ -348,6 +364,7 @@ class json_sax_acceptor : public json_sax return false; } }; +} } diff --git a/include/nlohmann/json.hpp b/include/nlohmann/json.hpp index 3a52ec8c..5a5f1cfe 100644 --- a/include/nlohmann/json.hpp +++ b/include/nlohmann/json.hpp @@ -172,7 +172,7 @@ class basic_json template friend class ::nlohmann::detail::binary_reader; template - friend class ::nlohmann::json_sax_dom_parser; + friend class ::nlohmann::detail::json_sax_dom_parser; /// workaround type for MSVC using basic_json_t = NLOHMANN_BASIC_JSON_TPL; diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index 666150d9..bda79b26 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -3258,6 +3258,8 @@ struct json_sax }; +namespace detail +{ template class json_sax_dom_parser : public json_sax { @@ -3306,9 +3308,16 @@ class json_sax_dom_parser : public json_sax return true; } - bool start_object(std::size_t) override + bool start_object(std::size_t len) override { ref_stack.push_back(handle_value(BasicJsonType::value_t::object)); + + if (JSON_UNLIKELY(len != json_sax::no_limit and len > ref_stack.back()->max_size())) + { + JSON_THROW(out_of_range::create(408, + "excessive object size: " + std::to_string(len))); + } + return true; } @@ -3325,9 +3334,16 @@ class json_sax_dom_parser : public json_sax return true; } - bool start_array(std::size_t) override + bool start_array(std::size_t len) override { ref_stack.push_back(handle_value(BasicJsonType::value_t::array)); + + if (JSON_UNLIKELY(len != json_sax::no_limit and len > ref_stack.back()->max_size())) + { + JSON_THROW(out_of_range::create(408, + "excessive array size: " + std::to_string(len))); + } + return true; } @@ -3482,6 +3498,7 @@ class json_sax_acceptor : public json_sax return false; } }; +} } @@ -5285,6 +5302,8 @@ class output_adapter // #include +// #include + // #include // #include @@ -5309,6 +5328,7 @@ class binary_reader using number_integer_t = typename BasicJsonType::number_integer_t; using number_unsigned_t = typename BasicJsonType::number_unsigned_t; using string_t = typename BasicJsonType::string_t; + using json_sax_t = json_sax; public: /*! @@ -5333,13 +5353,16 @@ class binary_reader */ BasicJsonType parse_cbor(const bool strict) { - const auto res = parse_cbor_internal(); + BasicJsonType result; + json_sax_dom_parser sdp(result); + parse_cbor_sax_internal(&sdp); + result.assert_invariant(); if (strict) { get(); expect_eof(); } - return res; + return result; } /*! @@ -5354,13 +5377,16 @@ class binary_reader */ BasicJsonType parse_msgpack(const bool strict) { - const auto res = parse_msgpack_internal(); + BasicJsonType result; + json_sax_dom_parser sdp(result); + parse_msgpack_sax_internal(&sdp); + result.assert_invariant(); if (strict) { get(); expect_eof(); } - return res; + return result; } /*! @@ -5375,13 +5401,16 @@ class binary_reader */ BasicJsonType parse_ubjson(const bool strict) { - const auto res = parse_ubjson_internal(); + BasicJsonType result; + json_sax_dom_parser sdp(result); + parse_ubjson_sax_internal(&sdp); + result.assert_invariant(); if (strict) { get_ignore_noop(); expect_eof(); } - return res; + return result; } /*! @@ -5402,13 +5431,14 @@ class binary_reader input (true, default) or whether the last read character should be considered instead */ - BasicJsonType parse_cbor_internal(const bool get_char = true) + bool parse_cbor_sax_internal(json_sax_t* sax, const bool get_char = true) { switch (get_char ? get() : current) { // EOF case std::char_traits::eof(): - JSON_THROW(parse_error::create(110, chars_read, "unexpected end of input")); + unexpect_eof(); + return sax->parse_error(chars_read, "", "unexpected end of input"); // Integer 0x00..0x17 (0..23) case 0x00: @@ -5435,19 +5465,19 @@ class binary_reader case 0x15: case 0x16: case 0x17: - return static_cast(current); + return sax->number_unsigned(static_cast(current)); case 0x18: // Unsigned integer (one-byte uint8_t follows) - return get_number(); + return sax->number_unsigned(get_number()); case 0x19: // Unsigned integer (two-byte uint16_t follows) - return get_number(); + return sax->number_unsigned(get_number()); case 0x1A: // Unsigned integer (four-byte uint32_t follows) - return get_number(); + return sax->number_unsigned(get_number()); case 0x1B: // Unsigned integer (eight-byte uint64_t follows) - return get_number(); + return sax->number_unsigned(get_number()); // Negative integer -1-0x00..-1-0x17 (-1..-24) case 0x20: @@ -5474,28 +5504,20 @@ class binary_reader case 0x35: case 0x36: case 0x37: - return static_cast(0x20 - 1 - current); + return sax->number_integer(static_cast(0x20 - 1 - current)); case 0x38: // Negative integer (one-byte uint8_t follows) - { - return static_cast(-1) - get_number(); - } + return sax->number_integer(static_cast(-1) - get_number()); case 0x39: // Negative integer -1-n (two-byte uint16_t follows) - { - return static_cast(-1) - get_number(); - } + return sax->number_integer(static_cast(-1) - get_number()); case 0x3A: // Negative integer -1-n (four-byte uint32_t follows) - { - return static_cast(-1) - get_number(); - } + return sax->number_integer(static_cast(-1) - get_number()); case 0x3B: // Negative integer -1-n (eight-byte uint64_t follows) - { - return static_cast(-1) - - static_cast(get_number()); - } + return sax->number_integer(static_cast(-1) - + static_cast(get_number())); // UTF-8 string (0x00..0x17 bytes follow) case 0x60: @@ -5527,9 +5549,7 @@ class binary_reader case 0x7A: // UTF-8 string (four-byte uint32_t for n follow) case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow) case 0x7F: // UTF-8 string (indefinite length) - { - return get_cbor_string(); - } + return sax->string(get_cbor_string()); // array (0x00..0x17 data items follow) case 0x80: @@ -5556,39 +5576,22 @@ class binary_reader case 0x95: case 0x96: case 0x97: - { - return get_cbor_array(current & 0x1F); - } + return get_cbor_array_sax(sax, static_cast(current & 0x1F)); case 0x98: // array (one-byte uint8_t for n follows) - { - return get_cbor_array(get_number()); - } + return get_cbor_array_sax(sax, static_cast(get_number())); case 0x99: // array (two-byte uint16_t for n follow) - { - return get_cbor_array(get_number()); - } + return get_cbor_array_sax(sax, static_cast(get_number())); case 0x9A: // array (four-byte uint32_t for n follow) - { - return get_cbor_array(get_number()); - } + return get_cbor_array_sax(sax, static_cast(get_number())); case 0x9B: // array (eight-byte uint64_t for n follow) - { - return get_cbor_array(get_number()); - } + return get_cbor_array_sax(sax, static_cast(get_number())); case 0x9F: // array (indefinite length) - { - BasicJsonType result = value_t::array; - while (get() != 0xFF) - { - result.push_back(parse_cbor_internal(false)); - } - return result; - } + return get_cbor_array_sax(sax, json_sax_t::no_limit); // map (0x00..0x17 pairs of data items follow) case 0xA0: @@ -5615,55 +5618,31 @@ class binary_reader case 0xB5: case 0xB6: case 0xB7: - { - return get_cbor_object(current & 0x1F); - } + return get_cbor_object_sax(sax, static_cast(current & 0x1F)); case 0xB8: // map (one-byte uint8_t for n follows) - { - return get_cbor_object(get_number()); - } + return get_cbor_object_sax(sax, static_cast(get_number())); case 0xB9: // map (two-byte uint16_t for n follow) - { - return get_cbor_object(get_number()); - } + return get_cbor_object_sax(sax, static_cast(get_number())); case 0xBA: // map (four-byte uint32_t for n follow) - { - return get_cbor_object(get_number()); - } + return get_cbor_object_sax(sax, static_cast(get_number())); case 0xBB: // map (eight-byte uint64_t for n follow) - { - return get_cbor_object(get_number()); - } + return get_cbor_object_sax(sax, static_cast(get_number())); case 0xBF: // map (indefinite length) - { - BasicJsonType result = value_t::object; - while (get() != 0xFF) - { - auto key = get_cbor_string(); - result[key] = parse_cbor_internal(); - } - return result; - } + return get_cbor_object_sax(sax, json_sax_t::no_limit); case 0xF4: // false - { - return false; - } + return sax->boolean(false); case 0xF5: // true - { - return true; - } + return sax->boolean(true); case 0xF6: // null - { - return value_t::null; - } + return sax->null(); case 0xF9: // Half-Precision Float (two-byte IEEE 754) { @@ -5697,18 +5676,14 @@ class binary_reader val = (mant == 0) ? std::numeric_limits::infinity() : std::numeric_limits::quiet_NaN(); } - return (half & 0x8000) != 0 ? -val : val; + return sax->number_float((half & 0x8000) != 0 ? -val : val, ""); } case 0xFA: // Single-Precision Float (four-byte IEEE 754) - { - return get_number(); - } + return sax->number_float(static_cast(get_number()), ""); case 0xFB: // Double-Precision Float (eight-byte IEEE 754) - { - return get_number(); - } + return sax->number_float(get_number(), ""); default: // anything else (0xFF is handled inside the other types) { @@ -5719,13 +5694,14 @@ class binary_reader } } - BasicJsonType parse_msgpack_internal() + bool parse_msgpack_sax_internal(json_sax_t* sax) { switch (get()) { // EOF case std::char_traits::eof(): - JSON_THROW(parse_error::create(110, chars_read, "unexpected end of input")); + unexpect_eof(); + return sax->parse_error(chars_read, "", "unexpected end of input"); // positive fixint case 0x00: @@ -5856,7 +5832,7 @@ class binary_reader case 0x7D: case 0x7E: case 0x7F: - return static_cast(current); + return sax->number_unsigned(static_cast(current)); // fixmap case 0x80: @@ -5875,9 +5851,7 @@ class binary_reader case 0x8D: case 0x8E: case 0x8F: - { - return get_msgpack_object(current & 0x0F); - } + return get_msgpack_object_sax(sax, static_cast(current & 0x0F)); // fixarray case 0x90: @@ -5896,9 +5870,7 @@ class binary_reader case 0x9D: case 0x9E: case 0x9F: - { - return get_msgpack_array(current & 0x0F); - } + return get_msgpack_array_sax(sax, static_cast(current & 0x0F)); // fixstr case 0xA0: @@ -5933,73 +5905,65 @@ class binary_reader case 0xBD: case 0xBE: case 0xBF: - return get_msgpack_string(); + return sax->string(get_msgpack_string()); case 0xC0: // nil - return value_t::null; + return sax->null(); case 0xC2: // false - return false; + return sax->boolean(false); case 0xC3: // true - return true; + return sax->boolean(true); case 0xCA: // float 32 - return get_number(); + return sax->number_float(static_cast(get_number()), ""); case 0xCB: // float 64 - return get_number(); + return sax->number_float(get_number(), ""); case 0xCC: // uint 8 - return get_number(); + return sax->number_unsigned(get_number()); case 0xCD: // uint 16 - return get_number(); + return sax->number_unsigned(get_number()); case 0xCE: // uint 32 - return get_number(); + return sax->number_unsigned(get_number()); case 0xCF: // uint 64 - return get_number(); + return sax->number_unsigned(get_number()); case 0xD0: // int 8 - return get_number(); + return sax->number_integer(get_number()); case 0xD1: // int 16 - return get_number(); + return sax->number_integer(get_number()); case 0xD2: // int 32 - return get_number(); + return sax->number_integer(get_number()); case 0xD3: // int 64 - return get_number(); + return sax->number_integer(get_number()); case 0xD9: // str 8 case 0xDA: // str 16 case 0xDB: // str 32 - return get_msgpack_string(); + return sax->string(get_msgpack_string()); case 0xDC: // array 16 - { - return get_msgpack_array(get_number()); - } + return get_msgpack_array_sax(sax, static_cast(get_number())); case 0xDD: // array 32 - { - return get_msgpack_array(get_number()); - } + return get_msgpack_array_sax(sax, static_cast(get_number())); case 0xDE: // map 16 - { - return get_msgpack_object(get_number()); - } + return get_msgpack_object_sax(sax, static_cast(get_number())); case 0xDF: // map 32 - { - return get_msgpack_object(get_number()); - } + return get_msgpack_object_sax(sax, static_cast(get_number())); - // positive fixint + // negative fixint case 0xE0: case 0xE1: case 0xE2: @@ -6032,7 +5996,7 @@ class binary_reader case 0xFD: case 0xFE: case 0xFF: - return static_cast(current); + return sax->number_integer(static_cast(current)); default: // anything else { @@ -6049,9 +6013,9 @@ class binary_reader input (true, default) or whether the last read character should be considered instead */ - BasicJsonType parse_ubjson_internal(const bool get_char = true) + bool parse_ubjson_sax_internal(json_sax_t* sax, const bool get_char = true) { - return get_ubjson_value(get_char ? get_ignore_noop() : current); + return get_ubjson_sax_value(sax, get_char ? get_ignore_noop() : current); } /*! @@ -6191,29 +6155,19 @@ class binary_reader case 0x75: case 0x76: case 0x77: - { return get_string(current & 0x1F); - } case 0x78: // UTF-8 string (one-byte uint8_t for n follows) - { return get_string(get_number()); - } case 0x79: // UTF-8 string (two-byte uint16_t for n follow) - { return get_string(get_number()); - } case 0x7A: // UTF-8 string (four-byte uint32_t for n follow) - { return get_string(get_number()); - } case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow) - { return get_string(get_number()); - } case 0x7F: // UTF-8 string (indefinite length) { @@ -6234,31 +6188,75 @@ class binary_reader } } - template - BasicJsonType get_cbor_array(const NumberType len) + bool get_cbor_array_sax(json_sax_t* sax, const std::size_t len) { - BasicJsonType result = value_t::array; - std::generate_n(std::back_inserter(*result.m_value.array), len, [this]() + if (not sax->start_array(len)) { - return parse_cbor_internal(); - }); - return result; + return false; + } + + if (len != json_sax_t::no_limit) + for (std::size_t i = 0; i < len; ++i) + { + if (not parse_cbor_sax_internal(sax)) + { + return false; + } + } + else + { + while (get() != 0xFF) + { + if (not parse_cbor_sax_internal(sax, false)) + { + return false; + } + } + } + + return sax->end_array(); } - template - BasicJsonType get_cbor_object(const NumberType len) + bool get_cbor_object_sax(json_sax_t* sax, const std::size_t len) { - BasicJsonType result = value_t::object; - std::generate_n(std::inserter(*result.m_value.object, - result.m_value.object->end()), - len, [this]() + if (not sax->start_object(len)) { - get(); - auto key = get_cbor_string(); - auto val = parse_cbor_internal(); - return std::make_pair(std::move(key), std::move(val)); - }); - return result; + return false; + } + + if (len != json_sax_t::no_limit) + { + for (std::size_t i = 0; i < len; ++i) + { + get(); + if (not sax->key(get_cbor_string())) + { + return false; + } + + if (not parse_cbor_sax_internal(sax)) + { + return false; + } + } + } + else + { + while (get() != 0xFF) + { + if (not sax->key(get_cbor_string())) + { + return false; + } + + if (not parse_cbor_sax_internal(sax)) + { + return false; + } + } + } + + return sax->end_object(); } /*! @@ -6311,24 +6309,16 @@ class binary_reader case 0xBD: case 0xBE: case 0xBF: - { return get_string(current & 0x1F); - } case 0xD9: // str 8 - { return get_string(get_number()); - } case 0xDA: // str 16 - { return get_string(get_number()); - } case 0xDB: // str 32 - { return get_string(get_number()); - } default: { @@ -6340,31 +6330,46 @@ class binary_reader } } - template - BasicJsonType get_msgpack_array(const NumberType len) + bool get_msgpack_array_sax(json_sax_t* sax, const std::size_t len) { - BasicJsonType result = value_t::array; - std::generate_n(std::back_inserter(*result.m_value.array), len, [this]() + if (not sax->start_array(len)) { - return parse_msgpack_internal(); - }); - return result; + return false; + } + + for (std::size_t i = 0; i < len; ++i) + { + if (not parse_msgpack_sax_internal(sax)) + { + return false; + } + } + + return sax->end_array(); } - template - BasicJsonType get_msgpack_object(const NumberType len) + bool get_msgpack_object_sax(json_sax_t* sax, const std::size_t len) { - BasicJsonType result = value_t::object; - std::generate_n(std::inserter(*result.m_value.object, - result.m_value.object->end()), - len, [this]() + if (not sax->start_object(len)) + { + return false; + } + + for (std::size_t i = 0; i < len; ++i) { get(); - auto key = get_msgpack_string(); - auto val = parse_msgpack_internal(); - return std::make_pair(std::move(key), std::move(val)); - }); - return result; + if (not sax->key(get_msgpack_string())) + { + return false; + } + + if (not parse_msgpack_sax_internal(sax)) + { + return false; + } + } + + return sax->end_object(); } /*! @@ -6412,6 +6417,25 @@ class binary_reader } } + std::size_t get_ubjson_size_value() + { + switch (get_ignore_noop()) + { + case 'U': + return static_cast(get_number()); + case 'i': + return static_cast(get_number()); + case 'I': + return static_cast(get_number()); + case 'l': + return static_cast(get_number()); + case 'L': + return static_cast(get_number()); + default: + return std::size_t(-1); + } + } + /*! @brief determine the type and size for a container @@ -6440,45 +6464,46 @@ class binary_reader JSON_THROW(parse_error::create(112, chars_read, "expected '#' after UBJSON type information; last byte: 0x" + ss.str())); } - sz = parse_ubjson_internal(); + sz = get_ubjson_size_value(); } else if (current == '#') { - sz = parse_ubjson_internal(); + sz = get_ubjson_size_value(); } return std::make_pair(sz, tc); } - BasicJsonType get_ubjson_value(const int prefix) + bool get_ubjson_sax_value(json_sax_t* sax, const int prefix) { switch (prefix) { case std::char_traits::eof(): // EOF - JSON_THROW(parse_error::create(110, chars_read, "unexpected end of input")); + unexpect_eof(); + return sax->parse_error(chars_read, "", "unexpected end of input"); case 'T': // true - return true; + return sax->boolean(true); case 'F': // false - return false; + return sax->boolean(false); case 'Z': // null - return nullptr; + return sax->null(); case 'U': - return get_number(); + return sax->number_unsigned(get_number()); case 'i': - return get_number(); + return sax->number_integer(get_number()); case 'I': - return get_number(); + return sax->number_integer(get_number()); case 'l': - return get_number(); + return sax->number_integer(get_number()); case 'L': - return get_number(); + return sax->number_integer(get_number()); case 'd': - return get_number(); + return sax->number_float(static_cast(get_number()), ""); case 'D': - return get_number(); + return sax->number_float(get_number(), ""); case 'C': // char { @@ -6491,17 +6516,17 @@ class binary_reader JSON_THROW(parse_error::create(113, chars_read, "byte after 'C' must be in range 0x00..0x7F; last byte: 0x" + ss.str())); } - return string_t(1, static_cast(current)); + return sax->string(string_t(1, static_cast(current))); } case 'S': // string - return get_ubjson_string(); + return sax->string(get_ubjson_string()); case '[': // array - return get_ubjson_array(); + return get_ubjson_sax_array(sax); case '{': // object - return get_ubjson_object(); + return get_ubjson_sax_object(sax); default: // anything else std::stringstream ss; @@ -6511,98 +6536,123 @@ class binary_reader } } - BasicJsonType get_ubjson_array() + bool get_ubjson_sax_array(json_sax_t* sax) { - BasicJsonType result = value_t::array; const auto size_and_type = get_ubjson_size_type(); if (size_and_type.first != string_t::npos) { - if (JSON_UNLIKELY(size_and_type.first > result.max_size())) + if (not sax->start_array(size_and_type.first)) { - JSON_THROW(out_of_range::create(408, - "excessive array size: " + std::to_string(size_and_type.first))); + return false; } if (size_and_type.second != 0) { if (size_and_type.second != 'N') { - std::generate_n(std::back_inserter(*result.m_value.array), - size_and_type.first, [this, size_and_type]() + for (std::size_t i = 0; i < size_and_type.first; ++i) { - return get_ubjson_value(size_and_type.second); - }); + if (not get_ubjson_sax_value(sax, size_and_type.second)) + { + return false; + } + } } } else { - std::generate_n(std::back_inserter(*result.m_value.array), - size_and_type.first, [this]() + for (std::size_t i = 0; i < size_and_type.first; ++i) { - return parse_ubjson_internal(); - }); + if (not parse_ubjson_sax_internal(sax)) + { + return false; + } + } } } else { + if (not sax->start_array()) + { + return false; + } + while (current != ']') { - result.push_back(parse_ubjson_internal(false)); + if (not parse_ubjson_sax_internal(sax, false)) + { + return false; + } get_ignore_noop(); } } - return result; + return sax->end_array(); } - BasicJsonType get_ubjson_object() + bool get_ubjson_sax_object(json_sax_t* sax) { - BasicJsonType result = value_t::object; const auto size_and_type = get_ubjson_size_type(); if (size_and_type.first != string_t::npos) { - if (JSON_UNLIKELY(size_and_type.first > result.max_size())) + if (not sax->start_object(size_and_type.first)) { - JSON_THROW(out_of_range::create(408, - "excessive object size: " + std::to_string(size_and_type.first))); + return false; } if (size_and_type.second != 0) { - std::generate_n(std::inserter(*result.m_value.object, - result.m_value.object->end()), - size_and_type.first, [this, size_and_type]() + for (std::size_t i = 0; i < size_and_type.first; ++i) { - auto key = get_ubjson_string(); - auto val = get_ubjson_value(size_and_type.second); - return std::make_pair(std::move(key), std::move(val)); - }); + if (not sax->key(get_ubjson_string())) + { + return false; + } + if (not get_ubjson_sax_value(sax, size_and_type.second)) + { + return false; + } + } } else { - std::generate_n(std::inserter(*result.m_value.object, - result.m_value.object->end()), - size_and_type.first, [this]() + for (std::size_t i = 0; i < size_and_type.first; ++i) { - auto key = get_ubjson_string(); - auto val = parse_ubjson_internal(); - return std::make_pair(std::move(key), std::move(val)); - }); + if (not sax->key(get_ubjson_string())) + { + return false; + } + if (not parse_ubjson_sax_internal(sax)) + { + return false; + } + } } } else { + if (not sax->start_object()) + { + return false; + } + while (current != '}') { - auto key = get_ubjson_string(false); - result[std::move(key)] = parse_ubjson_internal(); + if (not sax->key(get_ubjson_string(false))) + { + return false; + } + if (not parse_ubjson_sax_internal(sax)) + { + return false; + } get_ignore_noop(); } } - return result; + return sax->end_object(); } /*! @@ -10225,7 +10275,7 @@ class basic_json template friend class ::nlohmann::detail::binary_reader; template - friend class ::nlohmann::json_sax_dom_parser; + friend class ::nlohmann::detail::json_sax_dom_parser; /// workaround type for MSVC using basic_json_t = NLOHMANN_BASIC_JSON_TPL; diff --git a/test/src/unit-class_parser.cpp b/test/src/unit-class_parser.cpp index d84148eb..1f6c2271 100644 --- a/test/src/unit-class_parser.cpp +++ b/test/src/unit-class_parser.cpp @@ -149,7 +149,7 @@ json parser_helper(const std::string& s) CHECK(j_nothrow == j); json j_sax; - nlohmann::json_sax_dom_parser sdp(j_sax); + nlohmann::detail::json_sax_dom_parser sdp(j_sax); json::sax_parse(s, &sdp); CHECK(j_sax == j); From 3d4f6a2940e04bbdba06bc7521b554709a570369 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Sun, 11 Mar 2018 22:47:25 +0100 Subject: [PATCH 15/43] :hammer: cleaner exception interface --- .../nlohmann/detail/input/binary_reader.hpp | 17 ++--- include/nlohmann/detail/input/json_sax.hpp | 30 ++++++--- include/nlohmann/detail/input/parser.hpp | 17 ++--- single_include/nlohmann/json.hpp | 65 ++++++++++--------- test/src/unit-class_parser.cpp | 2 +- test/src/unit-deserialization.cpp | 2 +- 6 files changed, 73 insertions(+), 60 deletions(-) diff --git a/include/nlohmann/detail/input/binary_reader.hpp b/include/nlohmann/detail/input/binary_reader.hpp index e7341352..eef3ad7c 100644 --- a/include/nlohmann/detail/input/binary_reader.hpp +++ b/include/nlohmann/detail/input/binary_reader.hpp @@ -147,8 +147,7 @@ class binary_reader { // EOF case std::char_traits::eof(): - unexpect_eof(); - return sax->parse_error(chars_read, "", "unexpected end of input"); + return sax->parse_error(chars_read, "", parse_error::create(110, chars_read, "unexpected end of input")); // Integer 0x00..0x17 (0..23) case 0x00: @@ -399,7 +398,7 @@ class binary_reader { std::stringstream ss; ss << std::setw(2) << std::uppercase << std::setfill('0') << std::hex << current; - JSON_THROW(parse_error::create(112, chars_read, "error reading CBOR; last byte: 0x" + ss.str())); + return sax->parse_error(chars_read, ss.str(), parse_error::create(112, chars_read, "error reading CBOR; last byte: 0x" + ss.str())); } } } @@ -410,8 +409,7 @@ class binary_reader { // EOF case std::char_traits::eof(): - unexpect_eof(); - return sax->parse_error(chars_read, "", "unexpected end of input"); + return sax->parse_error(chars_read, "", parse_error::create(110, chars_read, "unexpected end of input")); // positive fixint case 0x00: @@ -712,8 +710,7 @@ class binary_reader { std::stringstream ss; ss << std::setw(2) << std::uppercase << std::setfill('0') << std::hex << current; - JSON_THROW(parse_error::create(112, chars_read, - "error reading MessagePack; last byte: 0x" + ss.str())); + return sax->parse_error(chars_read, ss.str(), parse_error::create(112, chars_read, "error reading MessagePack; last byte: 0x" + ss.str())); } } } @@ -1189,8 +1186,7 @@ class binary_reader switch (prefix) { case std::char_traits::eof(): // EOF - unexpect_eof(); - return sax->parse_error(chars_read, "", "unexpected end of input"); + return sax->parse_error(chars_read, "", parse_error::create(110, chars_read, "unexpected end of input")); case 'T': // true return sax->boolean(true); @@ -1223,8 +1219,7 @@ class binary_reader { std::stringstream ss; ss << std::setw(2) << std::uppercase << std::setfill('0') << std::hex << current; - JSON_THROW(parse_error::create(113, chars_read, - "byte after 'C' must be in range 0x00..0x7F; last byte: 0x" + ss.str())); + return sax->parse_error(chars_read, ss.str(), parse_error::create(113, chars_read, "byte after 'C' must be in range 0x00..0x7F; last byte: 0x" + ss.str())); } return sax->string(string_t(1, static_cast(current))); } diff --git a/include/nlohmann/detail/input/json_sax.hpp b/include/nlohmann/detail/input/json_sax.hpp index bb98e785..b354a842 100644 --- a/include/nlohmann/detail/input/json_sax.hpp +++ b/include/nlohmann/detail/input/json_sax.hpp @@ -4,6 +4,8 @@ #include #include +#include + namespace nlohmann { @@ -118,7 +120,7 @@ struct json_sax */ virtual bool parse_error(std::size_t position, const std::string& last_token, - const std::string& error_msg) = 0; + const detail::exception& ex) = 0; virtual ~json_sax() = default; }; @@ -224,19 +226,27 @@ class json_sax_dom_parser : public json_sax return true; } - bool parse_error(std::size_t position, const std::string& token, - const std::string& error_msg) override + bool parse_error(std::size_t, const std::string&, + const detail::exception& ex) override { errored = true; if (allow_exceptions) { - if (error_msg == "number overflow") + // determine the proper exception type from the id + switch ((ex.id / 100) % 100) { - JSON_THROW(BasicJsonType::out_of_range::create(406, "number overflow parsing '" + token + "'")); - } - else - { - JSON_THROW(BasicJsonType::parse_error::create(101, position, error_msg)); + case 1: + JSON_THROW(*reinterpret_cast(&ex)); + case 2: + JSON_THROW(*reinterpret_cast(&ex)); + case 3: + JSON_THROW(*reinterpret_cast(&ex)); + case 4: + JSON_THROW(*reinterpret_cast(&ex)); + case 5: + JSON_THROW(*reinterpret_cast(&ex)); + default: + assert(false); // LCOV_EXCL_LINE } } return false; @@ -359,7 +369,7 @@ class json_sax_acceptor : public json_sax return true; } - bool parse_error(std::size_t, const std::string&, const std::string&) override + bool parse_error(std::size_t, const std::string&, const detail::exception&) override { return false; } diff --git a/include/nlohmann/detail/input/parser.hpp b/include/nlohmann/detail/input/parser.hpp index 32f2be88..847538d6 100644 --- a/include/nlohmann/detail/input/parser.hpp +++ b/include/nlohmann/detail/input/parser.hpp @@ -117,7 +117,7 @@ class parser { sdp.parse_error(m_lexer.get_position(), m_lexer.get_token_string(), - exception_message(token_type::end_of_input)); + parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_of_input))); } // in case of an error, return discarded value @@ -456,7 +456,7 @@ class parser { return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string(), - exception_message(token_type::value_string)); + parse_error::create(101, m_lexer.get_position(), exception_message(token_type::value_string))); } else { @@ -472,7 +472,7 @@ class parser { return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string(), - exception_message(token_type::name_separator)); + parse_error::create(101, m_lexer.get_position(), exception_message(token_type::name_separator))); } // parse value @@ -499,7 +499,8 @@ class parser { return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string(), - exception_message(token_type::end_object)); + parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_object))); + } } } @@ -546,7 +547,7 @@ class parser { return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string(), - exception_message(token_type::end_array)); + parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_array))); } } } @@ -559,7 +560,7 @@ class parser { return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string(), - "number overflow"); + out_of_range::create(406, "number overflow parsing '" + m_lexer.get_token_string() + "'")); } else { @@ -602,14 +603,14 @@ class parser // using "uninitialized" to avoid "expected" message return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string(), - exception_message(token_type::uninitialized)); + parse_error::create(101, m_lexer.get_position(), exception_message(token_type::uninitialized))); } default: // the last token was unexpected { return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string(), - exception_message(token_type::literal_or_value)); + parse_error::create(101, m_lexer.get_position(), exception_message(token_type::literal_or_value))); } } } diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index bda79b26..73f9ee6c 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -3138,6 +3138,9 @@ scan_number_done: #include #include +// #include + + namespace nlohmann { @@ -3252,7 +3255,7 @@ struct json_sax */ virtual bool parse_error(std::size_t position, const std::string& last_token, - const std::string& error_msg) = 0; + const detail::exception& ex) = 0; virtual ~json_sax() = default; }; @@ -3358,19 +3361,27 @@ class json_sax_dom_parser : public json_sax return true; } - bool parse_error(std::size_t position, const std::string& token, - const std::string& error_msg) override + bool parse_error(std::size_t, const std::string&, + const detail::exception& ex) override { errored = true; if (allow_exceptions) { - if (error_msg == "number overflow") + // determine the proper exception type from the id + switch ((ex.id / 100) % 100) { - JSON_THROW(BasicJsonType::out_of_range::create(406, "number overflow parsing '" + token + "'")); - } - else - { - JSON_THROW(BasicJsonType::parse_error::create(101, position, error_msg)); + case 1: + JSON_THROW(*reinterpret_cast(&ex)); + case 2: + JSON_THROW(*reinterpret_cast(&ex)); + case 3: + JSON_THROW(*reinterpret_cast(&ex)); + case 4: + JSON_THROW(*reinterpret_cast(&ex)); + case 5: + JSON_THROW(*reinterpret_cast(&ex)); + default: + assert(false); // LCOV_EXCL_LINE } } return false; @@ -3493,7 +3504,7 @@ class json_sax_acceptor : public json_sax return true; } - bool parse_error(std::size_t, const std::string&, const std::string&) override + bool parse_error(std::size_t, const std::string&, const detail::exception&) override { return false; } @@ -3611,7 +3622,7 @@ class parser { sdp.parse_error(m_lexer.get_position(), m_lexer.get_token_string(), - exception_message(token_type::end_of_input)); + parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_of_input))); } // in case of an error, return discarded value @@ -3950,7 +3961,7 @@ class parser { return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string(), - exception_message(token_type::value_string)); + parse_error::create(101, m_lexer.get_position(), exception_message(token_type::value_string))); } else { @@ -3966,7 +3977,7 @@ class parser { return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string(), - exception_message(token_type::name_separator)); + parse_error::create(101, m_lexer.get_position(), exception_message(token_type::name_separator))); } // parse value @@ -3993,7 +4004,8 @@ class parser { return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string(), - exception_message(token_type::end_object)); + parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_object))); + } } } @@ -4040,7 +4052,7 @@ class parser { return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string(), - exception_message(token_type::end_array)); + parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_array))); } } } @@ -4053,7 +4065,7 @@ class parser { return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string(), - "number overflow"); + out_of_range::create(406, "number overflow parsing '" + m_lexer.get_token_string() + "'")); } else { @@ -4096,14 +4108,14 @@ class parser // using "uninitialized" to avoid "expected" message return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string(), - exception_message(token_type::uninitialized)); + parse_error::create(101, m_lexer.get_position(), exception_message(token_type::uninitialized))); } default: // the last token was unexpected { return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string(), - exception_message(token_type::literal_or_value)); + parse_error::create(101, m_lexer.get_position(), exception_message(token_type::literal_or_value))); } } } @@ -5437,8 +5449,7 @@ class binary_reader { // EOF case std::char_traits::eof(): - unexpect_eof(); - return sax->parse_error(chars_read, "", "unexpected end of input"); + return sax->parse_error(chars_read, "", parse_error::create(110, chars_read, "unexpected end of input")); // Integer 0x00..0x17 (0..23) case 0x00: @@ -5689,7 +5700,7 @@ class binary_reader { std::stringstream ss; ss << std::setw(2) << std::uppercase << std::setfill('0') << std::hex << current; - JSON_THROW(parse_error::create(112, chars_read, "error reading CBOR; last byte: 0x" + ss.str())); + return sax->parse_error(chars_read, ss.str(), parse_error::create(112, chars_read, "error reading CBOR; last byte: 0x" + ss.str())); } } } @@ -5700,8 +5711,7 @@ class binary_reader { // EOF case std::char_traits::eof(): - unexpect_eof(); - return sax->parse_error(chars_read, "", "unexpected end of input"); + return sax->parse_error(chars_read, "", parse_error::create(110, chars_read, "unexpected end of input")); // positive fixint case 0x00: @@ -6002,8 +6012,7 @@ class binary_reader { std::stringstream ss; ss << std::setw(2) << std::uppercase << std::setfill('0') << std::hex << current; - JSON_THROW(parse_error::create(112, chars_read, - "error reading MessagePack; last byte: 0x" + ss.str())); + return sax->parse_error(chars_read, ss.str(), parse_error::create(112, chars_read, "error reading MessagePack; last byte: 0x" + ss.str())); } } } @@ -6479,8 +6488,7 @@ class binary_reader switch (prefix) { case std::char_traits::eof(): // EOF - unexpect_eof(); - return sax->parse_error(chars_read, "", "unexpected end of input"); + return sax->parse_error(chars_read, "", parse_error::create(110, chars_read, "unexpected end of input")); case 'T': // true return sax->boolean(true); @@ -6513,8 +6521,7 @@ class binary_reader { std::stringstream ss; ss << std::setw(2) << std::uppercase << std::setfill('0') << std::hex << current; - JSON_THROW(parse_error::create(113, chars_read, - "byte after 'C' must be in range 0x00..0x7F; last byte: 0x" + ss.str())); + return sax->parse_error(chars_read, ss.str(), parse_error::create(113, chars_read, "byte after 'C' must be in range 0x00..0x7F; last byte: 0x" + ss.str())); } return sax->string(string_t(1, static_cast(current))); } diff --git a/test/src/unit-class_parser.cpp b/test/src/unit-class_parser.cpp index 1f6c2271..fec21001 100644 --- a/test/src/unit-class_parser.cpp +++ b/test/src/unit-class_parser.cpp @@ -123,7 +123,7 @@ class SaxEventLogger : public nlohmann::json::json_sax_t return true; } - bool parse_error(std::size_t position, const std::string&, const std::string&) override + bool parse_error(std::size_t position, const std::string&, const json::exception&) override { errored = true; events.push_back("parse_error(" + std::to_string(position) + ")"); diff --git a/test/src/unit-deserialization.cpp b/test/src/unit-deserialization.cpp index cd87dfdb..371df88c 100644 --- a/test/src/unit-deserialization.cpp +++ b/test/src/unit-deserialization.cpp @@ -122,7 +122,7 @@ struct SaxEventLogger : public nlohmann::json::json_sax_t return true; } - bool parse_error(std::size_t position, const std::string&, const std::string&) override + bool parse_error(std::size_t position, const std::string&, const json::exception&) override { events.push_back("parse_error(" + std::to_string(position) + ")"); return false; From 282bafae4f2dd8a3399897e5657984121fedc7f5 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Wed, 14 Mar 2018 07:18:42 +0100 Subject: [PATCH 16/43] :hammer: fixed compilation error --- include/nlohmann/detail/input/json_sax.hpp | 22 +++++++++++++--------- single_include/nlohmann/json.hpp | 22 +++++++++++++--------- 2 files changed, 26 insertions(+), 18 deletions(-) diff --git a/include/nlohmann/detail/input/json_sax.hpp b/include/nlohmann/detail/input/json_sax.hpp index b354a842..28dfe6a4 100644 --- a/include/nlohmann/detail/input/json_sax.hpp +++ b/include/nlohmann/detail/input/json_sax.hpp @@ -21,6 +21,8 @@ struct json_sax using number_unsigned_t = typename BasicJsonType::number_unsigned_t; /// type for floating-point numbers using number_float_t = typename BasicJsonType::number_float_t; + /// type for strings + using string_t = typename BasicJsonType::string_t; /// constant to indicate that no size limit is given for array or object static constexpr auto no_limit = std::size_t(-1); @@ -58,14 +60,14 @@ struct json_sax @param[in] s raw token value @return whether parsing should proceed */ - virtual bool number_float(number_float_t val, const std::string& s) = 0; + virtual bool number_float(number_float_t val, const string_t& s) = 0; /*! @brief a string was read @param[in] val string value @return whether parsing should proceed */ - virtual bool string(std::string&& val) = 0; + virtual bool string(string_t&& val) = 0; /*! @brief the beginning of an object was read @@ -80,7 +82,7 @@ struct json_sax @param[in] val object key @return whether parsing should proceed */ - virtual bool key(std::string&& val) = 0; + virtual bool key(string_t&& val) = 0; /*! @brief the end of an object was read @@ -135,6 +137,7 @@ class json_sax_dom_parser : public json_sax using number_integer_t = typename BasicJsonType::number_integer_t; using number_unsigned_t = typename BasicJsonType::number_unsigned_t; using number_float_t = typename BasicJsonType::number_float_t; + using string_t = typename BasicJsonType::string_t; json_sax_dom_parser(BasicJsonType& r, const bool allow_exceptions_ = true) : root(r), allow_exceptions(allow_exceptions_) @@ -164,13 +167,13 @@ class json_sax_dom_parser : public json_sax return true; } - bool number_float(number_float_t val, const std::string&) override + bool number_float(number_float_t val, const string_t&) override { handle_value(val); return true; } - bool string(std::string&& val) override + bool string(string_t&& val) override { handle_value(val); return true; @@ -189,7 +192,7 @@ class json_sax_dom_parser : public json_sax return true; } - bool key(std::string&& val) override + bool key(string_t&& val) override { // add null at given key and store the reference for later object_element = &(ref_stack.back()->m_value.object->operator[](val)); @@ -308,6 +311,7 @@ class json_sax_acceptor : public json_sax using number_integer_t = typename BasicJsonType::number_integer_t; using number_unsigned_t = typename BasicJsonType::number_unsigned_t; using number_float_t = typename BasicJsonType::number_float_t; + using string_t = typename BasicJsonType::string_t; bool null() override { @@ -329,12 +333,12 @@ class json_sax_acceptor : public json_sax return true; } - bool number_float(number_float_t, const std::string&) override + bool number_float(number_float_t, const string_t&) override { return true; } - bool string(std::string&&) override + bool string(string_t&&) override { return true; } @@ -344,7 +348,7 @@ class json_sax_acceptor : public json_sax return true; } - bool key(std::string&&) override + bool key(string_t&&) override { return true; } diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index 7a35db73..bb25aa87 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -3157,6 +3157,8 @@ struct json_sax using number_unsigned_t = typename BasicJsonType::number_unsigned_t; /// type for floating-point numbers using number_float_t = typename BasicJsonType::number_float_t; + /// type for strings + using string_t = typename BasicJsonType::string_t; /// constant to indicate that no size limit is given for array or object static constexpr auto no_limit = std::size_t(-1); @@ -3194,14 +3196,14 @@ struct json_sax @param[in] s raw token value @return whether parsing should proceed */ - virtual bool number_float(number_float_t val, const std::string& s) = 0; + virtual bool number_float(number_float_t val, const string_t& s) = 0; /*! @brief a string was read @param[in] val string value @return whether parsing should proceed */ - virtual bool string(std::string&& val) = 0; + virtual bool string(string_t&& val) = 0; /*! @brief the beginning of an object was read @@ -3216,7 +3218,7 @@ struct json_sax @param[in] val object key @return whether parsing should proceed */ - virtual bool key(std::string&& val) = 0; + virtual bool key(string_t&& val) = 0; /*! @brief the end of an object was read @@ -3271,6 +3273,7 @@ class json_sax_dom_parser : public json_sax using number_integer_t = typename BasicJsonType::number_integer_t; using number_unsigned_t = typename BasicJsonType::number_unsigned_t; using number_float_t = typename BasicJsonType::number_float_t; + using string_t = typename BasicJsonType::string_t; json_sax_dom_parser(BasicJsonType& r, const bool allow_exceptions_ = true) : root(r), allow_exceptions(allow_exceptions_) @@ -3300,13 +3303,13 @@ class json_sax_dom_parser : public json_sax return true; } - bool number_float(number_float_t val, const std::string&) override + bool number_float(number_float_t val, const string_t&) override { handle_value(val); return true; } - bool string(std::string&& val) override + bool string(string_t&& val) override { handle_value(val); return true; @@ -3325,7 +3328,7 @@ class json_sax_dom_parser : public json_sax return true; } - bool key(std::string&& val) override + bool key(string_t&& val) override { // add null at given key and store the reference for later object_element = &(ref_stack.back()->m_value.object->operator[](val)); @@ -3444,6 +3447,7 @@ class json_sax_acceptor : public json_sax using number_integer_t = typename BasicJsonType::number_integer_t; using number_unsigned_t = typename BasicJsonType::number_unsigned_t; using number_float_t = typename BasicJsonType::number_float_t; + using string_t = typename BasicJsonType::string_t; bool null() override { @@ -3465,12 +3469,12 @@ class json_sax_acceptor : public json_sax return true; } - bool number_float(number_float_t, const std::string&) override + bool number_float(number_float_t, const string_t&) override { return true; } - bool string(std::string&&) override + bool string(string_t&&) override { return true; } @@ -3480,7 +3484,7 @@ class json_sax_acceptor : public json_sax return true; } - bool key(std::string&&) override + bool key(string_t&&) override { return true; } From 1262d474ebbd3ce8d2581d16da392a4896dc58d2 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Wed, 14 Mar 2018 07:32:28 +0100 Subject: [PATCH 17/43] :checkered_flag: fixed an MSVC warning --- test/src/unit-ubjson.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/src/unit-ubjson.cpp b/test/src/unit-ubjson.cpp index 88b72fc8..db0a8520 100644 --- a/test/src/unit-ubjson.cpp +++ b/test/src/unit-ubjson.cpp @@ -152,7 +152,7 @@ TEST_CASE("UBJSON") numbers.push_back(-10000000); numbers.push_back(-100000000); numbers.push_back(-1000000000); - numbers.push_back(-2147483648L); + numbers.push_back(-2147483647 - 1); // https://stackoverflow.com/a/29356002/266378 for (auto i : numbers) { CAPTURE(i); From 829ed74d66ae58fc203c3c7cb5d3348705c18c75 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Wed, 14 Mar 2018 07:38:22 +0100 Subject: [PATCH 18/43] :checkered_flag: experimenting with /Wall --- appveyor.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index da703786..828c3223 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -6,8 +6,8 @@ os: environment: matrix: - - additional_flags: "" - - additional_flags: "/permissive- /std:c++latest /utf-8" + - additional_flags: "/Wall" + - additional_flags: "/permissive- /std:c++latest /utf-8 /Wall" matrix: exclude: From 21410d50afc5d801bd149fa995a32d68cce50767 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Wed, 14 Mar 2018 17:21:31 +0100 Subject: [PATCH 19/43] :checkered_flag: moved /Wall to CMake --- appveyor.yml | 4 ++-- test/CMakeLists.txt | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index 828c3223..da703786 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -6,8 +6,8 @@ os: environment: matrix: - - additional_flags: "/Wall" - - additional_flags: "/permissive- /std:c++latest /utf-8 /Wall" + - additional_flags: "" + - additional_flags: "/permissive- /std:c++latest /utf-8" matrix: exclude: diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index e5f6dc55..6d921e99 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -82,7 +82,7 @@ if(MSVC) # Disable warning C4309: 'static_cast': truncation of constant value # Disable warning C4566: character represented by universal-character-name '\uFF01' cannot be represented in the current code page (1252) # Disable warning C4996: 'nlohmann::basic_json::operator <<': was declared deprecated - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4389 /wd4309 /wd4566 /wd4996") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4389 /wd4309 /wd4566 /wd4996 /Wall") endif() ############################################################################# From a52e8355b8c47c8b6c57c59b3afe9ab44b279093 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Wed, 14 Mar 2018 17:32:48 +0100 Subject: [PATCH 20/43] :rewind: oops --- test/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 6d921e99..e5f6dc55 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -82,7 +82,7 @@ if(MSVC) # Disable warning C4309: 'static_cast': truncation of constant value # Disable warning C4566: character represented by universal-character-name '\uFF01' cannot be represented in the current code page (1252) # Disable warning C4996: 'nlohmann::basic_json::operator <<': was declared deprecated - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4389 /wd4309 /wd4566 /wd4996 /Wall") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4389 /wd4309 /wd4566 /wd4996") endif() ############################################################################# From c87ffad45c75a2c74fe1a9a64f965d8e40c4c6d9 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Sat, 17 Mar 2018 14:46:50 +0100 Subject: [PATCH 21/43] :recycle: implemented a non-recursive parser --- include/nlohmann/detail/input/parser.hpp | 412 +++++++++++++++-------- single_include/nlohmann/json.hpp | 412 ++++++++++++++--------- 2 files changed, 526 insertions(+), 298 deletions(-) diff --git a/include/nlohmann/detail/input/parser.hpp b/include/nlohmann/detail/input/parser.hpp index 5559692d..83bd75f1 100644 --- a/include/nlohmann/detail/input/parser.hpp +++ b/include/nlohmann/detail/input/parser.hpp @@ -431,188 +431,304 @@ class parser bool sax_parse_internal(json_sax_t* sax) { - switch (last_token) + // two values for the structured values + enum class parse_state_t { array_value, object_value }; + // stack to remember the hieararchy of structured values we are parsing + std::vector states; + // value to avoid a goto (see comment where set to true) + bool skip_to_state_evaluation = false; + + while (true) { - case token_type::begin_object: + if (not skip_to_state_evaluation) { - if (not sax->start_object()) + // invariant: get_token() was called before each iteration + switch (last_token) { - return false; - } - - // read next token - get_token(); - - // closing } -> we are done - if (JSON_UNLIKELY(last_token == token_type::end_object)) - { - return sax->end_object(); - } - - // parse values - while (true) - { - // parse key - if (JSON_UNLIKELY(last_token != token_type::value_string)) + case token_type::begin_object: { - return sax->parse_error(m_lexer.get_position(), - m_lexer.get_token_string(), - parse_error::create(101, m_lexer.get_position(), exception_message(token_type::value_string))); - } - else - { - if (not sax->key(m_lexer.move_string())) + if (not sax->start_object()) { return false; } - } - // parse separator (:) - get_token(); - if (JSON_UNLIKELY(last_token != token_type::name_separator)) - { - return sax->parse_error(m_lexer.get_position(), - m_lexer.get_token_string(), - parse_error::create(101, m_lexer.get_position(), exception_message(token_type::name_separator))); - } + // read next token + get_token(); - // parse value - get_token(); - if (not sax_parse_internal(sax)) - { - return false; - } + // closing } -> we are done + if (last_token == token_type::end_object) + { + if (not sax->end_object()) + { + return false; + } + break; + } - // comma -> next value - get_token(); - if (last_token == token_type::value_separator) - { + // parse key + if (JSON_UNLIKELY(last_token != token_type::value_string)) + { + return sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + parse_error::create(101, m_lexer.get_position(), exception_message(token_type::value_string))); + } + else + { + if (not sax->key(m_lexer.move_string())) + { + return false; + } + } + + // parse separator (:) + get_token(); + if (JSON_UNLIKELY(last_token != token_type::name_separator)) + { + return sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + parse_error::create(101, m_lexer.get_position(), exception_message(token_type::name_separator))); + } + + // remember we are now inside an object + states.push_back(parse_state_t::object_value); + + // parse values get_token(); continue; } - // closing } - if (JSON_LIKELY(last_token == token_type::end_object)) + case token_type::begin_array: { - return sax->end_object(); + if (not sax->start_array()) + { + return false; + } + + // read next token + get_token(); + + // closing ] -> we are done + if (last_token == token_type::end_array) + { + if (not sax->end_array()) + { + return false; + } + break; + } + + // remember we are now inside an array + states.push_back(parse_state_t::array_value); + + // parse values (no need to call get_token) + continue; } - else + + case token_type::value_float: + { + const auto res = m_lexer.get_number_float(); + + if (JSON_UNLIKELY(not std::isfinite(res))) + { + return sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + out_of_range::create(406, "number overflow parsing '" + m_lexer.get_token_string() + "'")); + } + else + { + if (not sax->number_float(res, m_lexer.move_string())) + { + return false; + } + break; + } + } + + case token_type::literal_false: + { + if (not sax->boolean(false)) + { + return false; + } + break; + } + + case token_type::literal_null: + { + if (not sax->null()) + { + return false; + } + break; + } + + case token_type::literal_true: + { + if (not sax->boolean(true)) + { + return false; + } + break; + } + + case token_type::value_integer: + { + if (not sax->number_integer(m_lexer.get_number_integer())) + { + return false; + } + break; + } + + case token_type::value_string: + { + if (not sax->string(m_lexer.move_string())) + { + return false; + } + break; + } + + case token_type::value_unsigned: + { + if (not sax->number_unsigned(m_lexer.get_number_unsigned())) + { + return false; + } + break; + } + + case token_type::parse_error: + { + // using "uninitialized" to avoid "expected" message + return sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + parse_error::create(101, m_lexer.get_position(), exception_message(token_type::uninitialized))); + } + + default: // the last token was unexpected { return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string(), - parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_object))); - + parse_error::create(101, m_lexer.get_position(), exception_message(token_type::literal_or_value))); } } } - - case token_type::begin_array: + else { - if (not sax->start_array()) - { - return false; - } + skip_to_state_evaluation = false; + } - // read next token + // we reached this line after we successfully parsed a value + if (states.empty()) + { + // empty stack: we reached the end of the hieararchy: done + return true; + } + else + { get_token(); - - // closing ] -> we are done - if (last_token == token_type::end_array) + switch (states.back()) { - return sax->end_array(); - } - - // parse values - while (true) - { - // parse value - if (not sax_parse_internal(sax)) + case parse_state_t::array_value: { - return false; + // comma -> next value + if (last_token == token_type::value_separator) + { + // parse a new value + get_token(); + continue; + } + + // closing ] + if (JSON_LIKELY(last_token == token_type::end_array)) + { + if (not sax->end_array()) + { + return false; + } + + // We are done with this array. Before we can parse + // a new value, we need to evaluate the new state + // first. By setting skip_to_state_evaluation to + // false, we are effectively jumping to the + // beginning of this switch. + assert(not states.empty()); + states.pop_back(); + skip_to_state_evaluation = true; + continue; + } + else + { + return sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_array))); + } } - // comma -> next value - get_token(); - if (last_token == token_type::value_separator) + case parse_state_t::object_value: { - get_token(); - continue; - } + // comma -> next value + if (last_token == token_type::value_separator) + { + get_token(); - // closing ] - if (JSON_LIKELY(last_token == token_type::end_array)) - { - return sax->end_array(); - } - else - { - return sax->parse_error(m_lexer.get_position(), - m_lexer.get_token_string(), - parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_array))); + // parse key + if (JSON_UNLIKELY(last_token != token_type::value_string)) + { + return sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + parse_error::create(101, m_lexer.get_position(), exception_message(token_type::value_string))); + } + else + { + if (not sax->key(m_lexer.move_string())) + { + return false; + } + } + + // parse separator (:) + get_token(); + if (JSON_UNLIKELY(last_token != token_type::name_separator)) + { + return sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + parse_error::create(101, m_lexer.get_position(), exception_message(token_type::name_separator))); + } + + // parse values + get_token(); + continue; + } + + // closing } + if (JSON_LIKELY(last_token == token_type::end_object)) + { + if (not sax->end_object()) + { + return false; + } + + // We are done with this object. Before we can + // parse a new value, we need to evaluate the new + // state first. By setting skip_to_state_evaluation + // to false, we are effectively jumping to the + // beginning of this switch. + assert(not states.empty()); + states.pop_back(); + skip_to_state_evaluation = true; + continue; + } + else + { + return sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_object))); + } } } } - - case token_type::value_float: - { - const auto res = m_lexer.get_number_float(); - - if (JSON_UNLIKELY(not std::isfinite(res))) - { - return sax->parse_error(m_lexer.get_position(), - m_lexer.get_token_string(), - out_of_range::create(406, "number overflow parsing '" + m_lexer.get_token_string() + "'")); - } - else - { - return sax->number_float(res, m_lexer.move_string()); - } - } - - case token_type::literal_false: - { - return sax->boolean(false); - } - - case token_type::literal_null: - { - return sax->null(); - } - - case token_type::literal_true: - { - return sax->boolean(true); - } - - case token_type::value_integer: - { - return sax->number_integer(m_lexer.get_number_integer()); - } - - case token_type::value_string: - { - return sax->string(m_lexer.move_string()); - } - - case token_type::value_unsigned: - { - return sax->number_unsigned(m_lexer.get_number_unsigned()); - } - - case token_type::parse_error: - { - // using "uninitialized" to avoid "expected" message - return sax->parse_error(m_lexer.get_position(), - m_lexer.get_token_string(), - parse_error::create(101, m_lexer.get_position(), exception_message(token_type::uninitialized))); - } - - default: // the last token was unexpected - { - return sax->parse_error(m_lexer.get_position(), - m_lexer.get_token_string(), - parse_error::create(101, m_lexer.get_position(), exception_message(token_type::literal_or_value))); - } } } diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index cdc9d01f..a588e4f0 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -3941,188 +3941,300 @@ class parser bool sax_parse_internal(json_sax_t* sax) { - switch (last_token) + // two values for the structured values + enum class parse_state_t { array_value, object_value }; + // stack to remember the hieararchy of structured values we are parsing + std::vector states; + // value to avoid a goto (see comment where set to true) + bool skip_to_tail = false; + + while (true) { - case token_type::begin_object: + if (not skip_to_tail) { - if (not sax->start_object()) + // invariant: get_token() was called before each iteration + switch (last_token) { - return false; - } - - // read next token - get_token(); - - // closing } -> we are done - if (JSON_UNLIKELY(last_token == token_type::end_object)) - { - return sax->end_object(); - } - - // parse values - while (true) - { - // parse key - if (JSON_UNLIKELY(last_token != token_type::value_string)) + case token_type::begin_object: { - return sax->parse_error(m_lexer.get_position(), - m_lexer.get_token_string(), - parse_error::create(101, m_lexer.get_position(), exception_message(token_type::value_string))); - } - else - { - if (not sax->key(m_lexer.move_string())) + if (not sax->start_object()) { return false; } - } - // parse separator (:) - get_token(); - if (JSON_UNLIKELY(last_token != token_type::name_separator)) - { - return sax->parse_error(m_lexer.get_position(), - m_lexer.get_token_string(), - parse_error::create(101, m_lexer.get_position(), exception_message(token_type::name_separator))); - } - - // parse value - get_token(); - if (not sax_parse_internal(sax)) - { - return false; - } - - // comma -> next value - get_token(); - if (last_token == token_type::value_separator) - { + // read next token get_token(); + + // closing } -> we are done + if (last_token == token_type::end_object) + { + if (not sax->end_object()) + { + return false; + } + break; + } + + // parse key + if (JSON_UNLIKELY(last_token != token_type::value_string)) + { + return sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + parse_error::create(101, m_lexer.get_position(), exception_message(token_type::value_string))); + } + else + { + if (not sax->key(m_lexer.move_string())) + { + return false; + } + } + + // parse separator (:) + get_token(); + if (JSON_UNLIKELY(last_token != token_type::name_separator)) + { + return sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + parse_error::create(101, m_lexer.get_position(), exception_message(token_type::name_separator))); + } + + // parse values + get_token(); + states.push_back(parse_state_t::object_value); continue; } - // closing } - if (JSON_LIKELY(last_token == token_type::end_object)) + case token_type::begin_array: { - return sax->end_object(); + if (not sax->start_array()) + { + return false; + } + + // read next token + get_token(); + + // closing ] -> we are done + if (last_token == token_type::end_array) + { + if (not sax->end_array()) + { + return false; + } + break; + } + + // parse values (no need to call get_token) + states.push_back(parse_state_t::array_value); + continue; } - else + + case token_type::value_float: + { + const auto res = m_lexer.get_number_float(); + + if (JSON_UNLIKELY(not std::isfinite(res))) + { + return sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + out_of_range::create(406, "number overflow parsing '" + m_lexer.get_token_string() + "'")); + } + else + { + if (not sax->number_float(res, m_lexer.move_string())) + { + return false; + } + break; + } + } + + case token_type::literal_false: + { + if (not sax->boolean(false)) + { + return false; + } + break; + } + + case token_type::literal_null: + { + if (not sax->null()) + { + return false; + } + break; + } + + case token_type::literal_true: + { + if (not sax->boolean(true)) + { + return false; + } + break; + } + + case token_type::value_integer: + { + if (not sax->number_integer(m_lexer.get_number_integer())) + { + return false; + } + break; + } + + case token_type::value_string: + { + if (not sax->string(m_lexer.move_string())) + { + return false; + } + break; + } + + case token_type::value_unsigned: + { + if (not sax->number_unsigned(m_lexer.get_number_unsigned())) + { + return false; + } + break; + } + + case token_type::parse_error: + { + // using "uninitialized" to avoid "expected" message + return sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + parse_error::create(101, m_lexer.get_position(), exception_message(token_type::uninitialized))); + } + + default: // the last token was unexpected { return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string(), - parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_object))); - + parse_error::create(101, m_lexer.get_position(), exception_message(token_type::literal_or_value))); } } } - - case token_type::begin_array: + else { - if (not sax->start_array()) - { - return false; - } + skip_to_tail = false; + } - // read next token + // we reached this line after we successfully parsed a value + if (states.empty()) + { + // empty stack: we reached the end of the hieararchy: done + return true; + } + else + { get_token(); - - // closing ] -> we are done - if (last_token == token_type::end_array) + switch (states.back()) { - return sax->end_array(); - } - - // parse values - while (true) - { - // parse value - if (not sax_parse_internal(sax)) + case parse_state_t::array_value: { - return false; + // comma -> next value + if (last_token == token_type::value_separator) + { + // parse a new value + get_token(); + continue; + } + + // closing ] + if (JSON_LIKELY(last_token == token_type::end_array)) + { + if (not sax->end_array()) + { + return false; + } + + // We are done with this array. Before we can parse + // a new value, we need to evaluate the new state + // first. By setting skip_to_tail to false, we are + // effectively jumping to the beginning of this + // switch. + assert(not states.empty()); + states.pop_back(); + skip_to_tail = true; + continue; + } + else + { + return sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_array))); + } } - // comma -> next value - get_token(); - if (last_token == token_type::value_separator) + case parse_state_t::object_value: { - get_token(); - continue; - } + // comma -> next value + if (last_token == token_type::value_separator) + { + get_token(); - // closing ] - if (JSON_LIKELY(last_token == token_type::end_array)) - { - return sax->end_array(); - } - else - { - return sax->parse_error(m_lexer.get_position(), - m_lexer.get_token_string(), - parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_array))); + // parse key + if (JSON_UNLIKELY(last_token != token_type::value_string)) + { + return sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + parse_error::create(101, m_lexer.get_position(), exception_message(token_type::value_string))); + } + else + { + if (not sax->key(m_lexer.move_string())) + { + return false; + } + } + + // parse separator (:) + get_token(); + if (JSON_UNLIKELY(last_token != token_type::name_separator)) + { + return sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + parse_error::create(101, m_lexer.get_position(), exception_message(token_type::name_separator))); + } + + // parse values + get_token(); + continue; + } + + // closing } + if (JSON_LIKELY(last_token == token_type::end_object)) + { + if (not sax->end_object()) + { + return false; + } + + // We are done with this object. Before we can + // parse a new value, we need to evaluate the new + // state first. By setting skip_to_tail to false, + // we are effectively jumping to the beginning of + // this switch. + assert(not states.empty()); + states.pop_back(); + skip_to_tail = true; + continue; + } + else + { + return sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_object))); + } } } } - - case token_type::value_float: - { - const auto res = m_lexer.get_number_float(); - - if (JSON_UNLIKELY(not std::isfinite(res))) - { - return sax->parse_error(m_lexer.get_position(), - m_lexer.get_token_string(), - out_of_range::create(406, "number overflow parsing '" + m_lexer.get_token_string() + "'")); - } - else - { - return sax->number_float(res, m_lexer.move_string()); - } - } - - case token_type::literal_false: - { - return sax->boolean(false); - } - - case token_type::literal_null: - { - return sax->null(); - } - - case token_type::literal_true: - { - return sax->boolean(true); - } - - case token_type::value_integer: - { - return sax->number_integer(m_lexer.get_number_integer()); - } - - case token_type::value_string: - { - return sax->string(m_lexer.move_string()); - } - - case token_type::value_unsigned: - { - return sax->number_unsigned(m_lexer.get_number_unsigned()); - } - - case token_type::parse_error: - { - // using "uninitialized" to avoid "expected" message - return sax->parse_error(m_lexer.get_position(), - m_lexer.get_token_string(), - parse_error::create(101, m_lexer.get_position(), exception_message(token_type::uninitialized))); - } - - default: // the last token was unexpected - { - return sax->parse_error(m_lexer.get_position(), - m_lexer.get_token_string(), - parse_error::create(101, m_lexer.get_position(), exception_message(token_type::literal_or_value))); - } } } From 606a25195fcb863a7fae057dd2262bdc9f8ac877 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Sat, 17 Mar 2018 19:15:59 +0100 Subject: [PATCH 22/43] :white_check_mark: improved test coverage --- include/nlohmann/detail/input/json_sax.hpp | 6 +- single_include/nlohmann/json.hpp | 36 +++-- test/src/unit-class_parser.cpp | 171 +++++++++++++++++++++ 3 files changed, 194 insertions(+), 19 deletions(-) diff --git a/include/nlohmann/detail/input/json_sax.hpp b/include/nlohmann/detail/input/json_sax.hpp index 28dfe6a4..a0ffdc66 100644 --- a/include/nlohmann/detail/input/json_sax.hpp +++ b/include/nlohmann/detail/input/json_sax.hpp @@ -241,13 +241,13 @@ class json_sax_dom_parser : public json_sax case 1: JSON_THROW(*reinterpret_cast(&ex)); case 2: - JSON_THROW(*reinterpret_cast(&ex)); + JSON_THROW(*reinterpret_cast(&ex)); // LCOV_EXCL_LINE case 3: - JSON_THROW(*reinterpret_cast(&ex)); + JSON_THROW(*reinterpret_cast(&ex)); // LCOV_EXCL_LINE case 4: JSON_THROW(*reinterpret_cast(&ex)); case 5: - JSON_THROW(*reinterpret_cast(&ex)); + JSON_THROW(*reinterpret_cast(&ex)); // LCOV_EXCL_LINE default: assert(false); // LCOV_EXCL_LINE } diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index a588e4f0..639d6400 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -3377,13 +3377,13 @@ class json_sax_dom_parser : public json_sax case 1: JSON_THROW(*reinterpret_cast(&ex)); case 2: - JSON_THROW(*reinterpret_cast(&ex)); + JSON_THROW(*reinterpret_cast(&ex)); // LCOV_EXCL_LINE case 3: - JSON_THROW(*reinterpret_cast(&ex)); + JSON_THROW(*reinterpret_cast(&ex)); // LCOV_EXCL_LINE case 4: JSON_THROW(*reinterpret_cast(&ex)); case 5: - JSON_THROW(*reinterpret_cast(&ex)); + JSON_THROW(*reinterpret_cast(&ex)); // LCOV_EXCL_LINE default: assert(false); // LCOV_EXCL_LINE } @@ -3946,11 +3946,11 @@ class parser // stack to remember the hieararchy of structured values we are parsing std::vector states; // value to avoid a goto (see comment where set to true) - bool skip_to_tail = false; + bool skip_to_state_evaluation = false; while (true) { - if (not skip_to_tail) + if (not skip_to_state_evaluation) { // invariant: get_token() was called before each iteration switch (last_token) @@ -3999,9 +3999,11 @@ class parser parse_error::create(101, m_lexer.get_position(), exception_message(token_type::name_separator))); } + // remember we are now inside an object + states.push_back(parse_state_t::object_value); + // parse values get_token(); - states.push_back(parse_state_t::object_value); continue; } @@ -4025,8 +4027,10 @@ class parser break; } - // parse values (no need to call get_token) + // remember we are now inside an array states.push_back(parse_state_t::array_value); + + // parse values (no need to call get_token) continue; } @@ -4122,7 +4126,7 @@ class parser } else { - skip_to_tail = false; + skip_to_state_evaluation = false; } // we reached this line after we successfully parsed a value @@ -4156,12 +4160,12 @@ class parser // We are done with this array. Before we can parse // a new value, we need to evaluate the new state - // first. By setting skip_to_tail to false, we are - // effectively jumping to the beginning of this - // switch. + // first. By setting skip_to_state_evaluation to + // false, we are effectively jumping to the + // beginning of this switch. assert(not states.empty()); states.pop_back(); - skip_to_tail = true; + skip_to_state_evaluation = true; continue; } else @@ -4218,12 +4222,12 @@ class parser // We are done with this object. Before we can // parse a new value, we need to evaluate the new - // state first. By setting skip_to_tail to false, - // we are effectively jumping to the beginning of - // this switch. + // state first. By setting skip_to_state_evaluation + // to false, we are effectively jumping to the + // beginning of this switch. assert(not states.empty()); states.pop_back(); - skip_to_tail = true; + skip_to_state_evaluation = true; continue; } else diff --git a/test/src/unit-class_parser.cpp b/test/src/unit-class_parser.cpp index e37f398a..c425207a 100644 --- a/test/src/unit-class_parser.cpp +++ b/test/src/unit-class_parser.cpp @@ -134,6 +134,81 @@ class SaxEventLogger : public nlohmann::json::json_sax_t bool errored = false; }; +class SaxCountdown : public nlohmann::json::json_sax_t +{ + public: + explicit SaxCountdown(const int count) : events_left(count) + {} + + bool null() override + { + return events_left-- > 0; + } + + bool boolean(bool) override + { + return events_left-- > 0; + } + + bool number_integer(json::number_integer_t) override + { + return events_left-- > 0; + } + + bool number_unsigned(json::number_unsigned_t) override + { + return events_left-- > 0; + } + + bool number_float(json::number_float_t, const std::string&) override + { + return events_left-- > 0; + } + + bool string(std::string&&) override + { + return events_left-- > 0; + } + + bool start_object(std::size_t) override + { + return events_left-- > 0; + } + + bool key(std::string&&) override + { + return events_left-- > 0; + } + + bool end_object() override + { + return events_left-- > 0; + } + + bool start_array(std::size_t) override + { + return events_left-- > 0; + } + + bool end_array() override + { + return events_left-- > 0; + } + + bool binary(const std::vector&) override + { + return events_left-- > 0; + } + + bool parse_error(std::size_t, const std::string&, const json::exception&) override + { + return false; + } + + private: + int events_left = 0; +}; + json parser_helper(const std::string& s); bool accept_helper(const std::string& s); @@ -1595,4 +1670,100 @@ TEST_CASE("parser class") CHECK(j == json(true)); } } + + SECTION("improve test coverage") + { + SECTION("parser with callback") + { + json::parser_callback_t cb = [](int, json::parse_event_t, json&) + { + return true; + }; + + CHECK(json::parse("{\"foo\": true:", cb, false).is_discarded()); + + CHECK_THROWS_AS(json::parse("{\"foo\": true:", cb), json::parse_error&); + CHECK_THROWS_WITH(json::parse("{\"foo\": true:", cb), + "[json.exception.parse_error.101] parse error at 13: syntax error - unexpected ':'; expected '}'"); + + CHECK_THROWS_AS(json::parse("1.18973e+4932", cb), json::out_of_range&); + CHECK_THROWS_WITH(json::parse("1.18973e+4932", cb), + "[json.exception.out_of_range.406] number overflow parsing '1.18973e+4932'"); + } + + SECTION("SAX parser") + { + SECTION("} without value") + { + SaxCountdown s(1); + CHECK(json::sax_parse("{}", &s) == false); + } + + SECTION("} with value") + { + SaxCountdown s(3); + CHECK(json::sax_parse("{\"k1\": true}", &s) == false); + } + + SECTION("second key") + { + SaxCountdown s(3); + CHECK(json::sax_parse("{\"k1\": true, \"k2\": false}", &s) == false); + } + + SECTION("] without value") + { + SaxCountdown s(1); + CHECK(json::sax_parse("[]", &s) == false); + } + + SECTION("] with value") + { + SaxCountdown s(2); + CHECK(json::sax_parse("[1]", &s) == false); + } + + SECTION("float") + { + SaxCountdown s(0); + CHECK(json::sax_parse("3.14", &s) == false); + } + + SECTION("false") + { + SaxCountdown s(0); + CHECK(json::sax_parse("false", &s) == false); + } + + SECTION("null") + { + SaxCountdown s(0); + CHECK(json::sax_parse("null", &s) == false); + } + + SECTION("true") + { + SaxCountdown s(0); + CHECK(json::sax_parse("true", &s) == false); + } + + SECTION("unsigned") + { + SaxCountdown s(0); + CHECK(json::sax_parse("12", &s) == false); + } + + SECTION("integer") + { + SaxCountdown s(0); + CHECK(json::sax_parse("-12", &s) == false); + } + + SECTION("string") + { + SaxCountdown s(0); + CHECK(json::sax_parse("\"foo\"", &s) == false); + } + } + } } From 375b05a17d3bc7c93eee9933748e85fc5420923b Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Sat, 17 Mar 2018 23:31:18 +0100 Subject: [PATCH 23/43] :hammer: cleanup --- include/nlohmann/detail/input/json_sax.hpp | 26 +++++++++++++--------- single_include/nlohmann/json.hpp | 26 +++++++++++++--------- 2 files changed, 32 insertions(+), 20 deletions(-) diff --git a/include/nlohmann/detail/input/json_sax.hpp b/include/nlohmann/detail/input/json_sax.hpp index a0ffdc66..8bbaa9c9 100644 --- a/include/nlohmann/detail/input/json_sax.hpp +++ b/include/nlohmann/detail/input/json_sax.hpp @@ -277,17 +277,23 @@ class json_sax_dom_parser : public json_sax } else { - assert(ref_stack.back()->is_array() or ref_stack.back()->is_object()); - if (ref_stack.back()->is_array()) + switch (ref_stack.back()->m_type) { - ref_stack.back()->m_value.array->push_back(BasicJsonType(std::forward(v))); - return &(ref_stack.back()->m_value.array->back()); - } - else - { - assert(object_element); - *object_element = BasicJsonType(std::forward(v)); - return object_element; + case value_t::array: + { + ref_stack.back()->m_value.array->push_back(BasicJsonType(std::forward(v))); + return &(ref_stack.back()->m_value.array->back()); + } + + case value_t::object: + { + assert(object_element); + *object_element = BasicJsonType(std::forward(v)); + return object_element; + } + + default: + assert(false); // LCOV_EXCL_LINE } } } diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index 639d6400..50302903 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -3413,17 +3413,23 @@ class json_sax_dom_parser : public json_sax } else { - assert(ref_stack.back()->is_array() or ref_stack.back()->is_object()); - if (ref_stack.back()->is_array()) + switch (ref_stack.back()->m_type) { - ref_stack.back()->m_value.array->push_back(BasicJsonType(std::forward(v))); - return &(ref_stack.back()->m_value.array->back()); - } - else - { - assert(object_element); - *object_element = BasicJsonType(std::forward(v)); - return object_element; + case value_t::array: + { + ref_stack.back()->m_value.array->push_back(BasicJsonType(std::forward(v))); + return &(ref_stack.back()->m_value.array->back()); + } + + case value_t::object: + { + assert(object_element); + *object_element = BasicJsonType(std::forward(v)); + return object_element; + } + + default: + assert(false); // LCOV_EXCL_LINE } } } From 22929fe18960c1a6d9c6fa18b666a1961cb38e36 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Sun, 18 Mar 2018 15:13:53 +0100 Subject: [PATCH 24/43] :construction: started a SAX/DOM/callback parser --- include/nlohmann/detail/input/json_sax.hpp | 216 +++++++++++++++++++ include/nlohmann/detail/input/parser.hpp | 21 ++ include/nlohmann/json.hpp | 2 + single_include/nlohmann/json.hpp | 240 +++++++++++++++++++++ 4 files changed, 479 insertions(+) diff --git a/include/nlohmann/detail/input/json_sax.hpp b/include/nlohmann/detail/input/json_sax.hpp index 8bbaa9c9..e19088a9 100644 --- a/include/nlohmann/detail/input/json_sax.hpp +++ b/include/nlohmann/detail/input/json_sax.hpp @@ -4,6 +4,7 @@ #include #include +#include #include namespace nlohmann @@ -310,6 +311,221 @@ class json_sax_dom_parser : public json_sax const bool allow_exceptions = true; }; +template +class json_sax_dom_callback_parser : public json_sax +{ + public: + using number_integer_t = typename BasicJsonType::number_integer_t; + using number_unsigned_t = typename BasicJsonType::number_unsigned_t; + using number_float_t = typename BasicJsonType::number_float_t; + using string_t = typename BasicJsonType::string_t; + using parser_callback_t = typename BasicJsonType::parser_callback_t; + using parse_event_t = typename BasicJsonType::parse_event_t; + + json_sax_dom_callback_parser(BasicJsonType& r, + const parser_callback_t cb = nullptr, + const bool allow_exceptions_ = true) + : root(r), callback(cb), allow_exceptions(allow_exceptions_) + { + keep_stack.push_back(true); + } + + bool null() override + { + handle_value(nullptr); + return true; + } + + bool boolean(bool val) override + { + handle_value(val); + return true; + } + + bool number_integer(number_integer_t val) override + { + handle_value(val); + return true; + } + + bool number_unsigned(number_unsigned_t val) override + { + handle_value(val); + return true; + } + + bool number_float(number_float_t val, const string_t&) override + { + handle_value(val); + return true; + } + + bool string(string_t&& val) override + { + handle_value(val); + return true; + } + + bool start_object(std::size_t len) override + { + const bool keep = callback(ref_stack.size() + 1, parse_event_t::object_start, discarded); + keep_stack.push_back(keep); + + ref_stack.push_back(handle_value(BasicJsonType::value_t::object)); + + if (JSON_UNLIKELY(len != json_sax::no_limit and len > ref_stack.back()->max_size())) + { + JSON_THROW(out_of_range::create(408, + "excessive object size: " + std::to_string(len))); + } + + return true; + } + + bool key(string_t&& val) override + { + BasicJsonType k = BasicJsonType(std::forward < string_t&& > (val)); + const bool keep = callback(ref_stack.size(), parse_event_t::key, k); + + // add null at given key and store the reference for later + object_element = &(ref_stack.back()->m_value.object->operator[](val)); + return true; + } + + bool end_object() override + { + const bool keep = callback(ref_stack.size() - 1, parse_event_t::object_end, *ref_stack.back()); + if (not keep) + { + *ref_stack.back() = discarded; + } + + ref_stack.pop_back(); + keep_stack.pop_back(); + return true; + } + + bool start_array(std::size_t len) override + { + const bool keep = callback(ref_stack.size() + 1, parse_event_t::array_start, discarded); + keep_stack.push_back(keep); + + ref_stack.push_back(handle_value(BasicJsonType::value_t::array)); + + if (JSON_UNLIKELY(len != json_sax::no_limit and len > ref_stack.back()->max_size())) + { + JSON_THROW(out_of_range::create(408, + "excessive array size: " + std::to_string(len))); + } + + return true; + } + + bool end_array() override + { + const bool keep = callback(ref_stack.size() - 1, parse_event_t::array_end, *ref_stack.back()); + if (not keep) + { + *ref_stack.back() = discarded; + } + + ref_stack.pop_back(); + keep_stack.pop_back(); + return true; + } + + bool binary(const std::vector&) override + { + return true; + } + + bool parse_error(std::size_t, const std::string&, + const detail::exception& ex) override + { + errored = true; + if (allow_exceptions) + { + // determine the proper exception type from the id + switch ((ex.id / 100) % 100) + { + case 1: + JSON_THROW(*reinterpret_cast(&ex)); + case 2: + JSON_THROW(*reinterpret_cast(&ex)); // LCOV_EXCL_LINE + case 3: + JSON_THROW(*reinterpret_cast(&ex)); // LCOV_EXCL_LINE + case 4: + JSON_THROW(*reinterpret_cast(&ex)); + case 5: + JSON_THROW(*reinterpret_cast(&ex)); // LCOV_EXCL_LINE + default: + assert(false); // LCOV_EXCL_LINE + } + } + return false; + } + + bool is_errored() const + { + return errored; + } + + private: + /*! + @invariant If the ref stack is empty, then the passed value will be the new + root. + @invariant If the ref stack contains a value, then it is an array or an + object to which we can add elements + */ + template + BasicJsonType* handle_value(Value&& v) + { + if (ref_stack.empty()) + { + root = BasicJsonType(std::forward(v)); + return &root; + } + else + { + switch (ref_stack.back()->m_type) + { + case value_t::array: + { + ref_stack.back()->m_value.array->push_back(BasicJsonType(std::forward(v))); + return &(ref_stack.back()->m_value.array->back()); + } + + case value_t::object: + { + assert(object_element); + *object_element = BasicJsonType(std::forward(v)); + return object_element; + } + + default: + assert(false); // LCOV_EXCL_LINE + } + } + } + + /// the parsed JSON value + BasicJsonType& root; + /// stack to model hierarchy of values + std::vector ref_stack; + /// stack to manage which values to keep + std::vector keep_stack; + /// helper to hold the reference for the next object element + BasicJsonType* object_element = nullptr; + /// whether a syntax error occurred + bool errored = false; + /// callback function + const parser_callback_t callback = nullptr; + /// whether to throw exceptions in case of errors + const bool allow_exceptions = true; + /// a discarded value for the callback + BasicJsonType discarded = BasicJsonType::value_t::discarded; +}; + template class json_sax_acceptor : public json_sax { diff --git a/include/nlohmann/detail/input/parser.hpp b/include/nlohmann/detail/input/parser.hpp index 83bd75f1..c0710cf8 100644 --- a/include/nlohmann/detail/input/parser.hpp +++ b/include/nlohmann/detail/input/parser.hpp @@ -83,6 +83,27 @@ class parser { if (callback) { + /* + json_sax_dom_callback_parser sdp(result, callback, allow_exceptions); + sax_parse_internal(&sdp); + result.assert_invariant(); + + // in strict mode, input must be completely read + if (strict and (get_token() != token_type::end_of_input)) + { + sdp.parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_of_input))); + } + + // in case of an error, return discarded value + if (sdp.is_errored()) + { + result = value_t::discarded; + return; + } + */ + parse_internal(true, result); result.assert_invariant(); diff --git a/include/nlohmann/json.hpp b/include/nlohmann/json.hpp index 02284597..bdc905f1 100644 --- a/include/nlohmann/json.hpp +++ b/include/nlohmann/json.hpp @@ -173,6 +173,8 @@ class basic_json friend class ::nlohmann::detail::binary_reader; template friend class ::nlohmann::detail::json_sax_dom_parser; + template + friend class ::nlohmann::detail::json_sax_dom_callback_parser; /// workaround type for MSVC using basic_json_t = NLOHMANN_BASIC_JSON_TPL; diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index 50302903..f9d997b9 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -3139,6 +3139,8 @@ scan_number_done: #include #include +// #include + // #include @@ -3446,6 +3448,221 @@ class json_sax_dom_parser : public json_sax const bool allow_exceptions = true; }; +template +class json_sax_dom_callback_parser : public json_sax +{ + public: + using number_integer_t = typename BasicJsonType::number_integer_t; + using number_unsigned_t = typename BasicJsonType::number_unsigned_t; + using number_float_t = typename BasicJsonType::number_float_t; + using string_t = typename BasicJsonType::string_t; + using parser_callback_t = typename BasicJsonType::parser_callback_t; + using parse_event_t = typename BasicJsonType::parse_event_t; + + json_sax_dom_callback_parser(BasicJsonType& r, + const parser_callback_t cb = nullptr, + const bool allow_exceptions_ = true) + : root(r), callback(cb), allow_exceptions(allow_exceptions_) + { + keep_stack.push_back(true); + } + + bool null() override + { + handle_value(nullptr); + return true; + } + + bool boolean(bool val) override + { + handle_value(val); + return true; + } + + bool number_integer(number_integer_t val) override + { + handle_value(val); + return true; + } + + bool number_unsigned(number_unsigned_t val) override + { + handle_value(val); + return true; + } + + bool number_float(number_float_t val, const string_t&) override + { + handle_value(val); + return true; + } + + bool string(string_t&& val) override + { + handle_value(val); + return true; + } + + bool start_object(std::size_t len) override + { + const bool keep = callback(ref_stack.size() + 1, parse_event_t::object_start, discarded); + keep_stack.push_back(keep); + + ref_stack.push_back(handle_value(BasicJsonType::value_t::object)); + + if (JSON_UNLIKELY(len != json_sax::no_limit and len > ref_stack.back()->max_size())) + { + JSON_THROW(out_of_range::create(408, + "excessive object size: " + std::to_string(len))); + } + + return true; + } + + bool key(string_t&& val) override + { + BasicJsonType k = BasicJsonType(std::forward < string_t&& > (val)); + const bool keep = callback(ref_stack.size(), parse_event_t::key, k); + + // add null at given key and store the reference for later + object_element = &(ref_stack.back()->m_value.object->operator[](val)); + return true; + } + + bool end_object() override + { + const bool keep = callback(ref_stack.size() - 1, parse_event_t::object_end, *ref_stack.back()); + if (not keep) + { + *ref_stack.back() = discarded; + } + + ref_stack.pop_back(); + keep_stack.pop_back(); + return true; + } + + bool start_array(std::size_t len) override + { + const bool keep = callback(ref_stack.size() + 1, parse_event_t::array_start, discarded); + keep_stack.push_back(keep); + + ref_stack.push_back(handle_value(BasicJsonType::value_t::array)); + + if (JSON_UNLIKELY(len != json_sax::no_limit and len > ref_stack.back()->max_size())) + { + JSON_THROW(out_of_range::create(408, + "excessive array size: " + std::to_string(len))); + } + + return true; + } + + bool end_array() override + { + const bool keep = callback(ref_stack.size() - 1, parse_event_t::array_end, *ref_stack.back()); + if (not keep) + { + *ref_stack.back() = discarded; + } + + ref_stack.pop_back(); + keep_stack.pop_back(); + return true; + } + + bool binary(const std::vector&) override + { + return true; + } + + bool parse_error(std::size_t, const std::string&, + const detail::exception& ex) override + { + errored = true; + if (allow_exceptions) + { + // determine the proper exception type from the id + switch ((ex.id / 100) % 100) + { + case 1: + JSON_THROW(*reinterpret_cast(&ex)); + case 2: + JSON_THROW(*reinterpret_cast(&ex)); // LCOV_EXCL_LINE + case 3: + JSON_THROW(*reinterpret_cast(&ex)); // LCOV_EXCL_LINE + case 4: + JSON_THROW(*reinterpret_cast(&ex)); + case 5: + JSON_THROW(*reinterpret_cast(&ex)); // LCOV_EXCL_LINE + default: + assert(false); // LCOV_EXCL_LINE + } + } + return false; + } + + bool is_errored() const + { + return errored; + } + + private: + /*! + @invariant If the ref stack is empty, then the passed value will be the new + root. + @invariant If the ref stack contains a value, then it is an array or an + object to which we can add elements + */ + template + BasicJsonType* handle_value(Value&& v) + { + if (ref_stack.empty()) + { + root = BasicJsonType(std::forward(v)); + return &root; + } + else + { + switch (ref_stack.back()->m_type) + { + case value_t::array: + { + ref_stack.back()->m_value.array->push_back(BasicJsonType(std::forward(v))); + return &(ref_stack.back()->m_value.array->back()); + } + + case value_t::object: + { + assert(object_element); + *object_element = BasicJsonType(std::forward(v)); + return object_element; + } + + default: + assert(false); // LCOV_EXCL_LINE + } + } + } + + /// the parsed JSON value + BasicJsonType& root; + /// stack to model hierarchy of values + std::vector ref_stack; + /// stack to manage which values to keep + std::vector keep_stack; + /// helper to hold the reference for the next object element + BasicJsonType* object_element = nullptr; + /// whether a syntax error occurred + bool errored = false; + /// callback function + const parser_callback_t callback = nullptr; + /// whether to throw exceptions in case of errors + const bool allow_exceptions = true; + /// a discarded value for the callback + BasicJsonType discarded = BasicJsonType::value_t::discarded; +}; + template class json_sax_acceptor : public json_sax { @@ -3599,6 +3816,27 @@ class parser { if (callback) { + /* + json_sax_dom_callback_parser sdp(result, callback, allow_exceptions); + sax_parse_internal(&sdp); + result.assert_invariant(); + + // in strict mode, input must be completely read + if (strict and (get_token() != token_type::end_of_input)) + { + sdp.parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_of_input))); + } + + // in case of an error, return discarded value + if (sdp.is_errored()) + { + result = value_t::discarded; + return; + } + */ + parse_internal(true, result); result.assert_invariant(); @@ -10411,6 +10649,8 @@ class basic_json friend class ::nlohmann::detail::binary_reader; template friend class ::nlohmann::detail::json_sax_dom_parser; + template + friend class ::nlohmann::detail::json_sax_dom_callback_parser; /// workaround type for MSVC using basic_json_t = NLOHMANN_BASIC_JSON_TPL; From 943d641054c36ff68b7eb057d5c0e1b8e7409ad7 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Sun, 18 Mar 2018 16:07:21 +0100 Subject: [PATCH 25/43] :hammer: some refactoring --- include/nlohmann/detail/input/json_sax.hpp | 54 ++++++-------- include/nlohmann/detail/input/parser.hpp | 30 ++++---- single_include/nlohmann/json.hpp | 84 ++++++++++------------ 3 files changed, 74 insertions(+), 94 deletions(-) diff --git a/include/nlohmann/detail/input/json_sax.hpp b/include/nlohmann/detail/input/json_sax.hpp index e19088a9..5763e652 100644 --- a/include/nlohmann/detail/input/json_sax.hpp +++ b/include/nlohmann/detail/input/json_sax.hpp @@ -278,23 +278,17 @@ class json_sax_dom_parser : public json_sax } else { - switch (ref_stack.back()->m_type) + assert(ref_stack.back()->is_array() or ref_stack.back()->is_object()); + if (ref_stack.back()->is_array()) { - case value_t::array: - { - ref_stack.back()->m_value.array->push_back(BasicJsonType(std::forward(v))); - return &(ref_stack.back()->m_value.array->back()); - } - - case value_t::object: - { - assert(object_element); - *object_element = BasicJsonType(std::forward(v)); - return object_element; - } - - default: - assert(false); // LCOV_EXCL_LINE + ref_stack.back()->m_value.array->emplace_back(std::forward(v)); + return &(ref_stack.back()->m_value.array->back()); + } + else + { + assert(object_element); + *object_element = BasicJsonType(std::forward(v)); + return object_element; } } } @@ -397,6 +391,7 @@ class json_sax_dom_callback_parser : public json_sax const bool keep = callback(ref_stack.size() - 1, parse_event_t::object_end, *ref_stack.back()); if (not keep) { + // discard object *ref_stack.back() = discarded; } @@ -426,6 +421,7 @@ class json_sax_dom_callback_parser : public json_sax const bool keep = callback(ref_stack.size() - 1, parse_event_t::array_end, *ref_stack.back()); if (not keep) { + // discard array *ref_stack.back() = discarded; } @@ -487,23 +483,17 @@ class json_sax_dom_callback_parser : public json_sax } else { - switch (ref_stack.back()->m_type) + assert(ref_stack.back()->is_array() or ref_stack.back()->is_object()); + if (ref_stack.back()->is_array()) { - case value_t::array: - { - ref_stack.back()->m_value.array->push_back(BasicJsonType(std::forward(v))); - return &(ref_stack.back()->m_value.array->back()); - } - - case value_t::object: - { - assert(object_element); - *object_element = BasicJsonType(std::forward(v)); - return object_element; - } - - default: - assert(false); // LCOV_EXCL_LINE + ref_stack.back()->m_value.array->emplace_back(std::forward(v)); + return &(ref_stack.back()->m_value.array->back()); + } + else + { + assert(object_element); + *object_element = BasicJsonType(std::forward(v)); + return object_element; } } } diff --git a/include/nlohmann/detail/input/parser.hpp b/include/nlohmann/detail/input/parser.hpp index c0710cf8..5d389dc6 100644 --- a/include/nlohmann/detail/input/parser.hpp +++ b/include/nlohmann/detail/input/parser.hpp @@ -468,7 +468,7 @@ class parser { case token_type::begin_object: { - if (not sax->start_object()) + if (JSON_UNLIKELY(not sax->start_object())) { return false; } @@ -479,7 +479,7 @@ class parser // closing } -> we are done if (last_token == token_type::end_object) { - if (not sax->end_object()) + if (JSON_UNLIKELY(not sax->end_object())) { return false; } @@ -495,7 +495,7 @@ class parser } else { - if (not sax->key(m_lexer.move_string())) + if (JSON_UNLIKELY(not sax->key(m_lexer.move_string()))) { return false; } @@ -520,7 +520,7 @@ class parser case token_type::begin_array: { - if (not sax->start_array()) + if (JSON_UNLIKELY(not sax->start_array())) { return false; } @@ -531,7 +531,7 @@ class parser // closing ] -> we are done if (last_token == token_type::end_array) { - if (not sax->end_array()) + if (JSON_UNLIKELY(not sax->end_array())) { return false; } @@ -557,7 +557,7 @@ class parser } else { - if (not sax->number_float(res, m_lexer.move_string())) + if (JSON_UNLIKELY(not sax->number_float(res, m_lexer.move_string()))) { return false; } @@ -567,7 +567,7 @@ class parser case token_type::literal_false: { - if (not sax->boolean(false)) + if (JSON_UNLIKELY(not sax->boolean(false))) { return false; } @@ -576,7 +576,7 @@ class parser case token_type::literal_null: { - if (not sax->null()) + if (JSON_UNLIKELY(not sax->null())) { return false; } @@ -585,7 +585,7 @@ class parser case token_type::literal_true: { - if (not sax->boolean(true)) + if (JSON_UNLIKELY(not sax->boolean(true))) { return false; } @@ -594,7 +594,7 @@ class parser case token_type::value_integer: { - if (not sax->number_integer(m_lexer.get_number_integer())) + if (JSON_UNLIKELY(not sax->number_integer(m_lexer.get_number_integer()))) { return false; } @@ -603,7 +603,7 @@ class parser case token_type::value_string: { - if (not sax->string(m_lexer.move_string())) + if (JSON_UNLIKELY(not sax->string(m_lexer.move_string()))) { return false; } @@ -612,7 +612,7 @@ class parser case token_type::value_unsigned: { - if (not sax->number_unsigned(m_lexer.get_number_unsigned())) + if (JSON_UNLIKELY(not sax->number_unsigned(m_lexer.get_number_unsigned()))) { return false; } @@ -664,7 +664,7 @@ class parser // closing ] if (JSON_LIKELY(last_token == token_type::end_array)) { - if (not sax->end_array()) + if (JSON_UNLIKELY(not sax->end_array())) { return false; } @@ -703,7 +703,7 @@ class parser } else { - if (not sax->key(m_lexer.move_string())) + if (JSON_UNLIKELY(not sax->key(m_lexer.move_string()))) { return false; } @@ -726,7 +726,7 @@ class parser // closing } if (JSON_LIKELY(last_token == token_type::end_object)) { - if (not sax->end_object()) + if (JSON_UNLIKELY(not sax->end_object())) { return false; } diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index f9d997b9..a060c7fb 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -3415,23 +3415,17 @@ class json_sax_dom_parser : public json_sax } else { - switch (ref_stack.back()->m_type) + assert(ref_stack.back()->is_array() or ref_stack.back()->is_object()); + if (ref_stack.back()->is_array()) { - case value_t::array: - { - ref_stack.back()->m_value.array->push_back(BasicJsonType(std::forward(v))); - return &(ref_stack.back()->m_value.array->back()); - } - - case value_t::object: - { - assert(object_element); - *object_element = BasicJsonType(std::forward(v)); - return object_element; - } - - default: - assert(false); // LCOV_EXCL_LINE + ref_stack.back()->m_value.array->emplace_back(std::forward(v)); + return &(ref_stack.back()->m_value.array->back()); + } + else + { + assert(object_element); + *object_element = BasicJsonType(std::forward(v)); + return object_element; } } } @@ -3534,6 +3528,7 @@ class json_sax_dom_callback_parser : public json_sax const bool keep = callback(ref_stack.size() - 1, parse_event_t::object_end, *ref_stack.back()); if (not keep) { + // discard object *ref_stack.back() = discarded; } @@ -3563,6 +3558,7 @@ class json_sax_dom_callback_parser : public json_sax const bool keep = callback(ref_stack.size() - 1, parse_event_t::array_end, *ref_stack.back()); if (not keep) { + // discard array *ref_stack.back() = discarded; } @@ -3624,23 +3620,17 @@ class json_sax_dom_callback_parser : public json_sax } else { - switch (ref_stack.back()->m_type) + assert(ref_stack.back()->is_array() or ref_stack.back()->is_object()); + if (ref_stack.back()->is_array()) { - case value_t::array: - { - ref_stack.back()->m_value.array->push_back(BasicJsonType(std::forward(v))); - return &(ref_stack.back()->m_value.array->back()); - } - - case value_t::object: - { - assert(object_element); - *object_element = BasicJsonType(std::forward(v)); - return object_element; - } - - default: - assert(false); // LCOV_EXCL_LINE + ref_stack.back()->m_value.array->emplace_back(std::forward(v)); + return &(ref_stack.back()->m_value.array->back()); + } + else + { + assert(object_element); + *object_element = BasicJsonType(std::forward(v)); + return object_element; } } } @@ -4201,7 +4191,7 @@ class parser { case token_type::begin_object: { - if (not sax->start_object()) + if (JSON_UNLIKELY(not sax->start_object())) { return false; } @@ -4212,7 +4202,7 @@ class parser // closing } -> we are done if (last_token == token_type::end_object) { - if (not sax->end_object()) + if (JSON_UNLIKELY(not sax->end_object())) { return false; } @@ -4228,7 +4218,7 @@ class parser } else { - if (not sax->key(m_lexer.move_string())) + if (JSON_UNLIKELY(not sax->key(m_lexer.move_string()))) { return false; } @@ -4253,7 +4243,7 @@ class parser case token_type::begin_array: { - if (not sax->start_array()) + if (JSON_UNLIKELY(not sax->start_array())) { return false; } @@ -4264,7 +4254,7 @@ class parser // closing ] -> we are done if (last_token == token_type::end_array) { - if (not sax->end_array()) + if (JSON_UNLIKELY(not sax->end_array())) { return false; } @@ -4290,7 +4280,7 @@ class parser } else { - if (not sax->number_float(res, m_lexer.move_string())) + if (JSON_UNLIKELY(not sax->number_float(res, m_lexer.move_string()))) { return false; } @@ -4300,7 +4290,7 @@ class parser case token_type::literal_false: { - if (not sax->boolean(false)) + if (JSON_UNLIKELY(not sax->boolean(false))) { return false; } @@ -4309,7 +4299,7 @@ class parser case token_type::literal_null: { - if (not sax->null()) + if (JSON_UNLIKELY(not sax->null())) { return false; } @@ -4318,7 +4308,7 @@ class parser case token_type::literal_true: { - if (not sax->boolean(true)) + if (JSON_UNLIKELY(not sax->boolean(true))) { return false; } @@ -4327,7 +4317,7 @@ class parser case token_type::value_integer: { - if (not sax->number_integer(m_lexer.get_number_integer())) + if (JSON_UNLIKELY(not sax->number_integer(m_lexer.get_number_integer()))) { return false; } @@ -4336,7 +4326,7 @@ class parser case token_type::value_string: { - if (not sax->string(m_lexer.move_string())) + if (JSON_UNLIKELY(not sax->string(m_lexer.move_string()))) { return false; } @@ -4345,7 +4335,7 @@ class parser case token_type::value_unsigned: { - if (not sax->number_unsigned(m_lexer.get_number_unsigned())) + if (JSON_UNLIKELY(not sax->number_unsigned(m_lexer.get_number_unsigned()))) { return false; } @@ -4397,7 +4387,7 @@ class parser // closing ] if (JSON_LIKELY(last_token == token_type::end_array)) { - if (not sax->end_array()) + if (JSON_UNLIKELY(not sax->end_array())) { return false; } @@ -4436,7 +4426,7 @@ class parser } else { - if (not sax->key(m_lexer.move_string())) + if (JSON_UNLIKELY(not sax->key(m_lexer.move_string()))) { return false; } @@ -4459,7 +4449,7 @@ class parser // closing } if (JSON_LIKELY(last_token == token_type::end_object)) { - if (not sax->end_object()) + if (JSON_UNLIKELY(not sax->end_object())) { return false; } From a271ee5f1664f9c398c4d2700f7cb0a8186ceb9a Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Sun, 18 Mar 2018 23:00:45 +0100 Subject: [PATCH 26/43] :recycle: proper use of SAX parser for binary formats --- .../nlohmann/detail/input/binary_reader.hpp | 729 +++++++++++++----- single_include/nlohmann/json.hpp | 729 +++++++++++++----- 2 files changed, 1044 insertions(+), 414 deletions(-) diff --git a/include/nlohmann/detail/input/binary_reader.hpp b/include/nlohmann/detail/input/binary_reader.hpp index eef3ad7c..f5e1cc34 100644 --- a/include/nlohmann/detail/input/binary_reader.hpp +++ b/include/nlohmann/detail/input/binary_reader.hpp @@ -30,7 +30,7 @@ namespace detail /////////////////// /*! -@brief deserialization of CBOR and MessagePack values +@brief deserialization of CBOR, MessagePack, and UBJSON values */ template class binary_reader @@ -65,7 +65,8 @@ class binary_reader { BasicJsonType result; json_sax_dom_parser sdp(result); - parse_cbor_sax_internal(&sdp); + sax = &sdp; + parse_cbor_internal(); result.assert_invariant(); if (strict) { @@ -89,7 +90,8 @@ class binary_reader { BasicJsonType result; json_sax_dom_parser sdp(result); - parse_msgpack_sax_internal(&sdp); + sax = &sdp; + parse_msgpack_internal(); result.assert_invariant(); if (strict) { @@ -113,7 +115,8 @@ class binary_reader { BasicJsonType result; json_sax_dom_parser sdp(result); - parse_ubjson_sax_internal(&sdp); + sax = &sdp; + parse_ubjson_internal(); result.assert_invariant(); if (strict) { @@ -140,14 +143,16 @@ class binary_reader @param[in] get_char whether a new character should be retrieved from the input (true, default) or whether the last read character should be considered instead + + @return whether a valid CBOR value was passed to the SAX parser */ - bool parse_cbor_sax_internal(json_sax_t* sax, const bool get_char = true) + bool parse_cbor_internal(const bool get_char = true) { switch (get_char ? get() : current) { // EOF case std::char_traits::eof(): - return sax->parse_error(chars_read, "", parse_error::create(110, chars_read, "unexpected end of input")); + return unexpect_eof(); // Integer 0x00..0x17 (0..23) case 0x00: @@ -177,16 +182,28 @@ class binary_reader return sax->number_unsigned(static_cast(current)); case 0x18: // Unsigned integer (one-byte uint8_t follows) - return sax->number_unsigned(get_number()); + { + uint8_t number; + return get_number(number) and sax->number_unsigned(number); + } case 0x19: // Unsigned integer (two-byte uint16_t follows) - return sax->number_unsigned(get_number()); + { + uint16_t number; + return get_number(number) and sax->number_unsigned(number); + } case 0x1A: // Unsigned integer (four-byte uint32_t follows) - return sax->number_unsigned(get_number()); + { + uint32_t number; + return get_number(number) and sax->number_unsigned(number); + } case 0x1B: // Unsigned integer (eight-byte uint64_t follows) - return sax->number_unsigned(get_number()); + { + uint64_t number; + return get_number(number) and sax->number_unsigned(number); + } // Negative integer -1-0x00..-1-0x17 (-1..-24) case 0x20: @@ -216,17 +233,29 @@ class binary_reader return sax->number_integer(static_cast(0x20 - 1 - current)); case 0x38: // Negative integer (one-byte uint8_t follows) - return sax->number_integer(static_cast(-1) - get_number()); + { + uint8_t number; + return get_number(number) and sax->number_integer(static_cast(-1) - number); + } case 0x39: // Negative integer -1-n (two-byte uint16_t follows) - return sax->number_integer(static_cast(-1) - get_number()); + { + uint16_t number; + return get_number(number) and sax->number_integer(static_cast(-1) - number); + } case 0x3A: // Negative integer -1-n (four-byte uint32_t follows) - return sax->number_integer(static_cast(-1) - get_number()); + { + uint32_t number; + return get_number(number) and sax->number_integer(static_cast(-1) - number); + } case 0x3B: // Negative integer -1-n (eight-byte uint64_t follows) - return sax->number_integer(static_cast(-1) - - static_cast(get_number())); + { + uint64_t number; + return get_number(number) and sax->number_integer(static_cast(-1) + - static_cast(number)); + } // UTF-8 string (0x00..0x17 bytes follow) case 0x60: @@ -258,7 +287,10 @@ class binary_reader case 0x7A: // UTF-8 string (four-byte uint32_t for n follow) case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow) case 0x7F: // UTF-8 string (indefinite length) - return sax->string(get_cbor_string()); + { + string_t s; + return get_cbor_string(s) and sax->string(std::move(s)); + } // array (0x00..0x17 data items follow) case 0x80: @@ -285,22 +317,34 @@ class binary_reader case 0x95: case 0x96: case 0x97: - return get_cbor_array_sax(sax, static_cast(current & 0x1F)); + return get_cbor_array(static_cast(current & 0x1F)); case 0x98: // array (one-byte uint8_t for n follows) - return get_cbor_array_sax(sax, static_cast(get_number())); + { + uint8_t len; + return get_number(len) and get_cbor_array(static_cast(len)); + } case 0x99: // array (two-byte uint16_t for n follow) - return get_cbor_array_sax(sax, static_cast(get_number())); + { + uint16_t len; + return get_number(len) and get_cbor_array(static_cast(len)); + } case 0x9A: // array (four-byte uint32_t for n follow) - return get_cbor_array_sax(sax, static_cast(get_number())); + { + uint32_t len; + return get_number(len) and get_cbor_array(static_cast(len)); + } case 0x9B: // array (eight-byte uint64_t for n follow) - return get_cbor_array_sax(sax, static_cast(get_number())); + { + uint64_t len; + return get_number(len) and get_cbor_array(static_cast(len)); + } case 0x9F: // array (indefinite length) - return get_cbor_array_sax(sax, json_sax_t::no_limit); + return get_cbor_array(json_sax_t::no_limit); // map (0x00..0x17 pairs of data items follow) case 0xA0: @@ -327,22 +371,34 @@ class binary_reader case 0xB5: case 0xB6: case 0xB7: - return get_cbor_object_sax(sax, static_cast(current & 0x1F)); + return get_cbor_object(static_cast(current & 0x1F)); case 0xB8: // map (one-byte uint8_t for n follows) - return get_cbor_object_sax(sax, static_cast(get_number())); + { + uint8_t len; + return get_number(len) and get_cbor_object(static_cast(len)); + } case 0xB9: // map (two-byte uint16_t for n follow) - return get_cbor_object_sax(sax, static_cast(get_number())); + { + uint16_t len; + return get_number(len) and get_cbor_object(static_cast(len)); + } case 0xBA: // map (four-byte uint32_t for n follow) - return get_cbor_object_sax(sax, static_cast(get_number())); + { + uint32_t len; + return get_number(len) and get_cbor_object(static_cast(len)); + } case 0xBB: // map (eight-byte uint64_t for n follow) - return get_cbor_object_sax(sax, static_cast(get_number())); + { + uint64_t len; + return get_number(len) and get_cbor_object(static_cast(len)); + } case 0xBF: // map (indefinite length) - return get_cbor_object_sax(sax, json_sax_t::no_limit); + return get_cbor_object(json_sax_t::no_limit); case 0xF4: // false return sax->boolean(false); @@ -356,9 +412,15 @@ class binary_reader case 0xF9: // Half-Precision Float (two-byte IEEE 754) { const int byte1 = get(); - unexpect_eof(); + if (JSON_UNLIKELY(not unexpect_eof())) + { + return false; + } const int byte2 = get(); - unexpect_eof(); + if (JSON_UNLIKELY(not unexpect_eof())) + { + return false; + } // code from RFC 7049, Appendix D, Figure 3: // As half-precision floating-point numbers were only added @@ -389,27 +451,35 @@ class binary_reader } case 0xFA: // Single-Precision Float (four-byte IEEE 754) - return sax->number_float(static_cast(get_number()), ""); + { + float number; + return get_number(number) and sax->number_float(static_cast(number), ""); + } case 0xFB: // Double-Precision Float (eight-byte IEEE 754) - return sax->number_float(get_number(), ""); + { + double number; + return get_number(number) and sax->number_float(number, ""); + } default: // anything else (0xFF is handled inside the other types) { - std::stringstream ss; - ss << std::setw(2) << std::uppercase << std::setfill('0') << std::hex << current; - return sax->parse_error(chars_read, ss.str(), parse_error::create(112, chars_read, "error reading CBOR; last byte: 0x" + ss.str())); + auto last_token = get_token_string(); + return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, "error reading CBOR; last byte: 0x" + last_token)); } } } - bool parse_msgpack_sax_internal(json_sax_t* sax) + /*! + @return whether a valid MessagePack value was passed to the SAX parser + */ + bool parse_msgpack_internal() { switch (get()) { // EOF case std::char_traits::eof(): - return sax->parse_error(chars_read, "", parse_error::create(110, chars_read, "unexpected end of input")); + return unexpect_eof(); // positive fixint case 0x00: @@ -559,7 +629,7 @@ class binary_reader case 0x8D: case 0x8E: case 0x8F: - return get_msgpack_object_sax(sax, static_cast(current & 0x0F)); + return get_msgpack_object(static_cast(current & 0x0F)); // fixarray case 0x90: @@ -578,7 +648,7 @@ class binary_reader case 0x9D: case 0x9E: case 0x9F: - return get_msgpack_array_sax(sax, static_cast(current & 0x0F)); + return get_msgpack_array(static_cast(current & 0x0F)); // fixstr case 0xA0: @@ -613,7 +683,10 @@ class binary_reader case 0xBD: case 0xBE: case 0xBF: - return sax->string(get_msgpack_string()); + { + string_t s; + return get_msgpack_string(s) and sax->string(std::move(s)); + } case 0xC0: // nil return sax->null(); @@ -625,51 +698,96 @@ class binary_reader return sax->boolean(true); case 0xCA: // float 32 - return sax->number_float(static_cast(get_number()), ""); + { + float number; + return get_number(number) and sax->number_float(static_cast(number), ""); + } case 0xCB: // float 64 - return sax->number_float(get_number(), ""); + { + double number; + return get_number(number) and sax->number_float(number, ""); + } case 0xCC: // uint 8 - return sax->number_unsigned(get_number()); + { + uint8_t number; + return get_number(number) and sax->number_unsigned(number); + } case 0xCD: // uint 16 - return sax->number_unsigned(get_number()); + { + uint16_t number; + return get_number(number) and sax->number_unsigned(number); + } case 0xCE: // uint 32 - return sax->number_unsigned(get_number()); + { + uint32_t number; + return get_number(number) and sax->number_unsigned(number); + } case 0xCF: // uint 64 - return sax->number_unsigned(get_number()); + { + uint64_t number; + return get_number(number) and sax->number_unsigned(number); + } case 0xD0: // int 8 - return sax->number_integer(get_number()); + { + int8_t number; + return get_number(number) and sax->number_integer(number); + } case 0xD1: // int 16 - return sax->number_integer(get_number()); + { + int16_t number; + return get_number(number) and sax->number_integer(number); + } case 0xD2: // int 32 - return sax->number_integer(get_number()); + { + int32_t number; + return get_number(number) and sax->number_integer(number); + } case 0xD3: // int 64 - return sax->number_integer(get_number()); + { + int64_t number; + return get_number(number) and sax->number_integer(number); + } case 0xD9: // str 8 case 0xDA: // str 16 case 0xDB: // str 32 - return sax->string(get_msgpack_string()); + { + string_t s; + return get_msgpack_string(s) and sax->string(std::move(s)); + } case 0xDC: // array 16 - return get_msgpack_array_sax(sax, static_cast(get_number())); + { + uint16_t len; + return get_number(len) and get_msgpack_array(static_cast(len)); + } case 0xDD: // array 32 - return get_msgpack_array_sax(sax, static_cast(get_number())); + { + uint32_t len; + return get_number(len) and get_msgpack_array(static_cast(len)); + } case 0xDE: // map 16 - return get_msgpack_object_sax(sax, static_cast(get_number())); + { + uint16_t len; + return get_number(len) and get_msgpack_object(static_cast(len)); + } case 0xDF: // map 32 - return get_msgpack_object_sax(sax, static_cast(get_number())); + { + uint32_t len; + return get_number(len) and get_msgpack_object(static_cast(len)); + } // negative fixint case 0xE0: @@ -708,9 +826,8 @@ class binary_reader default: // anything else { - std::stringstream ss; - ss << std::setw(2) << std::uppercase << std::setfill('0') << std::hex << current; - return sax->parse_error(chars_read, ss.str(), parse_error::create(112, chars_read, "error reading MessagePack; last byte: 0x" + ss.str())); + auto last_token = get_token_string(); + return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, "error reading MessagePack; last byte: 0x" + last_token)); } } } @@ -719,10 +836,12 @@ class binary_reader @param[in] get_char whether a new character should be retrieved from the input (true, default) or whether the last read character should be considered instead + + @return whether a valid UBJSON value was passed to the SAX parser */ - bool parse_ubjson_sax_internal(json_sax_t* sax, const bool get_char = true) + bool parse_ubjson_internal(const bool get_char = true) { - return get_ubjson_sax_value(sax, get_char ? get_ignore_noop() : current); + return get_ubjson_value(get_char ? get_ignore_noop() : current); } /*! @@ -758,23 +877,26 @@ class binary_reader @brief read a number from the input @tparam NumberType the type of the number + @param[out] result number of type @a NumberType - @return number of type @a NumberType + @return whether conversion completed @note This function needs to respect the system's endianess, because - bytes in CBOR and MessagePack are stored in network order (big - endian) and therefore need reordering on little endian systems. - - @throw parse_error.110 if input has less than `sizeof(NumberType)` bytes + bytes in CBOR, MessagePack, and UBJSON are stored in network order + (big endian) and therefore need reordering on little endian systems. */ - template NumberType get_number() + template + bool get_number(NumberType& result) { // step 1: read input into array with system's byte order std::array vec; for (std::size_t i = 0; i < sizeof(NumberType); ++i) { get(); - unexpect_eof(); + if (JSON_UNLIKELY(not unexpect_eof())) + { + return false; + } // reverse byte order prior to conversion if necessary if (is_little_endian) @@ -788,35 +910,37 @@ class binary_reader } // step 2: convert array into number of type T and return - NumberType result; std::memcpy(&result, vec.data(), sizeof(NumberType)); - return result; + return true; } /*! @brief create a string by reading characters from the input - @param[in] len number of bytes to read + @tparam NumberType the type of the number + @param[in] len number of characters to read + @param[out] string created by reading @a len bytes + + @return whether string creation completed @note We can not reserve @a len bytes for the result, because @a len may be too large. Usually, @ref unexpect_eof() detects the end of the input before we run out of string memory. - - @return string created by reading @a len bytes - - @throw parse_error.110 if input has less than @a len bytes */ template - string_t get_string(const NumberType len) + bool get_string(const NumberType len, string_t& result) { - string_t result; - std::generate_n(std::back_inserter(result), len, [this]() + bool success = true; + std::generate_n(std::back_inserter(result), len, [this, &success]() { get(); - unexpect_eof(); + if (JSON_UNLIKELY(not unexpect_eof())) + { + success = false; + } return static_cast(current); }); - return result; + return success; } /*! @@ -826,14 +950,16 @@ class binary_reader string length and then copies this number of bytes into a string. Additionally, CBOR's strings with indefinite lengths are supported. - @return string + @param[out] result created string - @throw parse_error.110 if input ended - @throw parse_error.113 if an unexpected byte is read + @return whether string creation completed */ - string_t get_cbor_string() + bool get_cbor_string(string_t& result) { - unexpect_eof(); + if (JSON_UNLIKELY(not unexpect_eof())) + { + return false; + } switch (current) { @@ -862,42 +988,64 @@ class binary_reader case 0x75: case 0x76: case 0x77: - return get_string(current & 0x1F); + { + return get_string(current & 0x1F, result); + } case 0x78: // UTF-8 string (one-byte uint8_t for n follows) - return get_string(get_number()); + { + uint8_t len; + return get_number(len) and get_string(len, result); + } case 0x79: // UTF-8 string (two-byte uint16_t for n follow) - return get_string(get_number()); + { + uint16_t len; + return get_number(len) and get_string(len, result); + } case 0x7A: // UTF-8 string (four-byte uint32_t for n follow) - return get_string(get_number()); + { + uint32_t len; + return get_number(len) and get_string(len, result); + } case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow) - return get_string(get_number()); + { + uint64_t len; + return get_number(len) and get_string(len, result); + } case 0x7F: // UTF-8 string (indefinite length) { - string_t result; while (get() != 0xFF) { - result.append(get_cbor_string()); + string_t chunk; + if (not get_cbor_string(chunk)) + { + return false; + } + result.append(chunk); } - return result; + return true; } default: { - std::stringstream ss; - ss << std::setw(2) << std::uppercase << std::setfill('0') << std::hex << current; - JSON_THROW(parse_error::create(113, chars_read, "expected a CBOR string; last byte: 0x" + ss.str())); + auto last_token = get_token_string(); + return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, "expected a CBOR string; last byte: 0x" + last_token)); } } } - bool get_cbor_array_sax(json_sax_t* sax, const std::size_t len) + /*! + @param[in] len the length of the array or json_sax_t::no_limit for an + array of indefinite size + @return whether array creation completed + */ + bool get_cbor_array(const std::size_t len) { - if (not sax->start_array(len)) + if (JSON_UNLIKELY(not sax->start_array(len))) { return false; } @@ -905,7 +1053,7 @@ class binary_reader if (len != json_sax_t::no_limit) for (std::size_t i = 0; i < len; ++i) { - if (not parse_cbor_sax_internal(sax)) + if (JSON_UNLIKELY(not parse_cbor_internal())) { return false; } @@ -914,7 +1062,7 @@ class binary_reader { while (get() != 0xFF) { - if (not parse_cbor_sax_internal(sax, false)) + if (JSON_UNLIKELY(not parse_cbor_internal(false))) { return false; } @@ -924,9 +1072,14 @@ class binary_reader return sax->end_array(); } - bool get_cbor_object_sax(json_sax_t* sax, const std::size_t len) + /*! + @param[in] len the length of the object or json_sax_t::no_limit for an + object of indefinite size + @return whether object creation completed + */ + bool get_cbor_object(const std::size_t len) { - if (not sax->start_object(len)) + if (not JSON_UNLIKELY(sax->start_object(len))) { return false; } @@ -936,12 +1089,13 @@ class binary_reader for (std::size_t i = 0; i < len; ++i) { get(); - if (not sax->key(get_cbor_string())) + string_t key; + if (JSON_UNLIKELY(not get_cbor_string(key) or not sax->key(std::move(key)))) { return false; } - if (not parse_cbor_sax_internal(sax)) + if (JSON_UNLIKELY(not parse_cbor_internal())) { return false; } @@ -951,12 +1105,13 @@ class binary_reader { while (get() != 0xFF) { - if (not sax->key(get_cbor_string())) + string_t key; + if (JSON_UNLIKELY(not get_cbor_string(key) or not sax->key(std::move(key)))) { return false; } - if (not parse_cbor_sax_internal(sax)) + if (JSON_UNLIKELY(not parse_cbor_internal())) { return false; } @@ -972,14 +1127,16 @@ class binary_reader This function first reads starting bytes to determine the expected string length and then copies this number of bytes into a string. - @return string + @param[out] result created string - @throw parse_error.110 if input ended - @throw parse_error.113 if an unexpected byte is read + @return whether string creation completed */ - string_t get_msgpack_string() + bool get_msgpack_string(string_t& result) { - unexpect_eof(); + if (JSON_UNLIKELY(not unexpect_eof())) + { + return false; + } switch (current) { @@ -1016,37 +1173,50 @@ class binary_reader case 0xBD: case 0xBE: case 0xBF: - return get_string(current & 0x1F); + { + return get_string(current & 0x1F, result); + } case 0xD9: // str 8 - return get_string(get_number()); + { + uint8_t len; + return get_number(len) and get_string(len, result); + } case 0xDA: // str 16 - return get_string(get_number()); + { + uint16_t len; + return get_number(len) and get_string(len, result); + } case 0xDB: // str 32 - return get_string(get_number()); + { + uint32_t len; + return get_number(len) and get_string(len, result); + } default: { - std::stringstream ss; - ss << std::setw(2) << std::uppercase << std::setfill('0') << std::hex << current; - JSON_THROW(parse_error::create(113, chars_read, - "expected a MessagePack string; last byte: 0x" + ss.str())); + auto last_token = get_token_string(); + return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, "expected a MessagePack string; last byte: 0x" + last_token)); } } } - bool get_msgpack_array_sax(json_sax_t* sax, const std::size_t len) + /*! + @param[in] len the length of the array + @return whether array creation completed + */ + bool get_msgpack_array(const std::size_t len) { - if (not sax->start_array(len)) + if (JSON_UNLIKELY(not sax->start_array(len))) { return false; } for (std::size_t i = 0; i < len; ++i) { - if (not parse_msgpack_sax_internal(sax)) + if (JSON_UNLIKELY(not parse_msgpack_internal())) { return false; } @@ -1055,9 +1225,13 @@ class binary_reader return sax->end_array(); } - bool get_msgpack_object_sax(json_sax_t* sax, const std::size_t len) + /*! + @param[in] len the length of the object + @return whether object creation completed + */ + bool get_msgpack_object(const std::size_t len) { - if (not sax->start_object(len)) + if (JSON_UNLIKELY(not sax->start_object(len))) { return false; } @@ -1065,12 +1239,13 @@ class binary_reader for (std::size_t i = 0; i < len; ++i) { get(); - if (not sax->key(get_msgpack_string())) + string_t key; + if (JSON_UNLIKELY(not get_msgpack_string(key) or not sax->key(std::move(key)))) { return false; } - if (not parse_msgpack_sax_internal(sax)) + if (JSON_UNLIKELY(not parse_msgpack_internal())) { return false; } @@ -1086,60 +1261,131 @@ class binary_reader indicating a string, or in case of an object key where the 'S' byte can be left out. + @param[out] result created string @param[in] get_char whether a new character should be retrieved from the input (true, default) or whether the last read character should be considered instead - @return string - - @throw parse_error.110 if input ended - @throw parse_error.113 if an unexpected byte is read + @return whether string creation completed */ - string_t get_ubjson_string(const bool get_char = true) + bool get_ubjson_string(string_t& result, const bool get_char = true) { if (get_char) { get(); // TODO: may we ignore N here? } - unexpect_eof(); + if (JSON_UNLIKELY(not unexpect_eof())) + { + return false; + } switch (current) { case 'U': - return get_string(get_number()); + { + uint8_t len; + return get_number(len) and get_string(len, result); + } + case 'i': - return get_string(get_number()); + { + int8_t len; + return get_number(len) and get_string(len, result); + } + case 'I': - return get_string(get_number()); + { + int16_t len; + return get_number(len) and get_string(len, result); + } + case 'l': - return get_string(get_number()); + { + int32_t len; + return get_number(len) and get_string(len, result); + } + case 'L': - return get_string(get_number()); + { + int64_t len; + return get_number(len) and get_string(len, result); + } + default: - std::stringstream ss; - ss << std::setw(2) << std::uppercase << std::setfill('0') << std::hex << current; - JSON_THROW(parse_error::create(113, chars_read, - "expected a UBJSON string; last byte: 0x" + ss.str())); + auto last_token = get_token_string(); + return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, "expected a UBJSON string; last byte: 0x" + last_token)); } } - std::size_t get_ubjson_size_value() + /*! + @param[out] result determined size + @return whether size determination completed + */ + bool get_ubjson_size_value(std::size_t& result) { switch (get_ignore_noop()) { case 'U': - return static_cast(get_number()); + { + uint8_t number; + if (JSON_UNLIKELY(not get_number(number))) + { + return false; + } + result = static_cast(number); + return true; + } + case 'i': - return static_cast(get_number()); + { + int8_t number; + if (JSON_UNLIKELY(not get_number(number))) + { + return false; + } + result = static_cast(number); + return true; + } + case 'I': - return static_cast(get_number()); + { + int16_t number; + if (JSON_UNLIKELY(not get_number(number))) + { + return false; + } + result = static_cast(number); + return true; + } + case 'l': - return static_cast(get_number()); + { + int32_t number; + if (JSON_UNLIKELY(not get_number(number))) + { + return false; + } + result = static_cast(number); + return true; + } + case 'L': - return static_cast(get_number()); + { + int64_t number; + if (JSON_UNLIKELY(not get_number(number))) + { + return false; + } + result = static_cast(number); + return true; + } + default: - return std::size_t(-1); + { + result = std::size_t(-1); + return true; + } } } @@ -1149,44 +1395,51 @@ class binary_reader In the optimized UBJSON format, a type and a size can be provided to allow for a more compact representation. - @return pair of the size and the type + @param[out] result pair of the size and the type + + @return whether pair creation completed */ - std::pair get_ubjson_size_type() + bool get_ubjson_size_type(std::pair& result) { - std::size_t sz = string_t::npos; - int tc = 0; + result.first = string_t::npos; // size + result.second = 0; // type get_ignore_noop(); if (current == '$') { - tc = get(); // must not ignore 'N', because 'N' maybe the type - unexpect_eof(); + result.second = get(); // must not ignore 'N', because 'N' maybe the type + if (JSON_UNLIKELY(not unexpect_eof())) + { + return false; + } get_ignore_noop(); - if (current != '#') + if (JSON_UNLIKELY(current != '#')) { - std::stringstream ss; - ss << std::setw(2) << std::uppercase << std::setfill('0') << std::hex << current; - JSON_THROW(parse_error::create(112, chars_read, - "expected '#' after UBJSON type information; last byte: 0x" + ss.str())); + auto last_token = get_token_string(); + return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, "expected '#' after UBJSON type information; last byte: 0x" + last_token)); } - sz = get_ubjson_size_value(); + + return get_ubjson_size_value(result.first); } else if (current == '#') { - sz = get_ubjson_size_value(); + return get_ubjson_size_value(result.first); } - - return std::make_pair(sz, tc); + return true; } - bool get_ubjson_sax_value(json_sax_t* sax, const int prefix) + /*! + @param prefix the previously read or set type prefix + @return whether value creation completed + */ + bool get_ubjson_value(const int prefix) { switch (prefix) { case std::char_traits::eof(): // EOF - return sax->parse_error(chars_read, "", parse_error::create(110, chars_read, "unexpected end of input")); + return unexpect_eof(); case 'T': // true return sax->boolean(true); @@ -1197,57 +1450,96 @@ class binary_reader return sax->null(); case 'U': - return sax->number_unsigned(get_number()); + { + uint8_t number; + return get_number(number) and sax->number_unsigned(number); + } + case 'i': - return sax->number_integer(get_number()); + { + int8_t number; + return get_number(number) and sax->number_integer(number); + } + case 'I': - return sax->number_integer(get_number()); + { + int16_t number; + return get_number(number) and sax->number_integer(number); + } + case 'l': - return sax->number_integer(get_number()); + { + int32_t number; + return get_number(number) and sax->number_integer(number); + } + case 'L': - return sax->number_integer(get_number()); + { + int64_t number; + return get_number(number) and sax->number_integer(number); + } + case 'd': - return sax->number_float(static_cast(get_number()), ""); + { + float number; + return get_number(number) and sax->number_float(static_cast(number), ""); + } + case 'D': - return sax->number_float(get_number(), ""); + { + double number; + return get_number(number) and sax->number_float(number, ""); + } case 'C': // char { get(); - unexpect_eof(); + if (JSON_UNLIKELY(not unexpect_eof())) + { + return false; + } if (JSON_UNLIKELY(current > 127)) { - std::stringstream ss; - ss << std::setw(2) << std::uppercase << std::setfill('0') << std::hex << current; - return sax->parse_error(chars_read, ss.str(), parse_error::create(113, chars_read, "byte after 'C' must be in range 0x00..0x7F; last byte: 0x" + ss.str())); + auto last_token = get_token_string(); + return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, "byte after 'C' must be in range 0x00..0x7F; last byte: 0x" + last_token)); } return sax->string(string_t(1, static_cast(current))); } case 'S': // string - return sax->string(get_ubjson_string()); + { + string_t s; + return get_ubjson_string(s) and sax->string(std::move(s)); + } case '[': // array - return get_ubjson_sax_array(sax); + return get_ubjson_array(); case '{': // object - return get_ubjson_sax_object(sax); + return get_ubjson_object(); default: // anything else - std::stringstream ss; - ss << std::setw(2) << std::uppercase << std::setfill('0') << std::hex << current; - JSON_THROW(parse_error::create(112, chars_read, - "error reading UBJSON; last byte: 0x" + ss.str())); + { + auto last_token = get_token_string(); + return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, "error reading UBJSON; last byte: 0x" + last_token)); + } } } - bool get_ubjson_sax_array(json_sax_t* sax) + /*! + @return whether array creation completed + */ + bool get_ubjson_array() { - const auto size_and_type = get_ubjson_size_type(); + std::pair size_and_type; + if (JSON_UNLIKELY(not get_ubjson_size_type(size_and_type))) + { + return false; + } if (size_and_type.first != string_t::npos) { - if (not sax->start_array(size_and_type.first)) + if (JSON_UNLIKELY(not sax->start_array(size_and_type.first))) { return false; } @@ -1258,7 +1550,7 @@ class binary_reader { for (std::size_t i = 0; i < size_and_type.first; ++i) { - if (not get_ubjson_sax_value(sax, size_and_type.second)) + if (JSON_UNLIKELY(not get_ubjson_value(size_and_type.second))) { return false; } @@ -1269,7 +1561,7 @@ class binary_reader { for (std::size_t i = 0; i < size_and_type.first; ++i) { - if (not parse_ubjson_sax_internal(sax)) + if (JSON_UNLIKELY(not parse_ubjson_internal())) { return false; } @@ -1278,14 +1570,14 @@ class binary_reader } else { - if (not sax->start_array()) + if (JSON_UNLIKELY(not sax->start_array())) { return false; } while (current != ']') { - if (not parse_ubjson_sax_internal(sax, false)) + if (JSON_UNLIKELY(not parse_ubjson_internal(false))) { return false; } @@ -1296,13 +1588,20 @@ class binary_reader return sax->end_array(); } - bool get_ubjson_sax_object(json_sax_t* sax) + /*! + @return whether object creation completed + */ + bool get_ubjson_object() { - const auto size_and_type = get_ubjson_size_type(); + std::pair size_and_type; + if (JSON_UNLIKELY(not get_ubjson_size_type(size_and_type))) + { + return false; + } if (size_and_type.first != string_t::npos) { - if (not sax->start_object(size_and_type.first)) + if (JSON_UNLIKELY(not sax->start_object(size_and_type.first))) { return false; } @@ -1311,11 +1610,12 @@ class binary_reader { for (std::size_t i = 0; i < size_and_type.first; ++i) { - if (not sax->key(get_ubjson_string())) + string_t key; + if (JSON_UNLIKELY(not get_ubjson_string(key) or not sax->key(std::move(key)))) { return false; } - if (not get_ubjson_sax_value(sax, size_and_type.second)) + if (JSON_UNLIKELY(not get_ubjson_value(size_and_type.second))) { return false; } @@ -1325,11 +1625,12 @@ class binary_reader { for (std::size_t i = 0; i < size_and_type.first; ++i) { - if (not sax->key(get_ubjson_string())) + string_t key; + if (JSON_UNLIKELY(not get_ubjson_string(key) or not sax->key(std::move(key)))) { return false; } - if (not parse_ubjson_sax_internal(sax)) + if (JSON_UNLIKELY(not parse_ubjson_internal())) { return false; } @@ -1338,18 +1639,19 @@ class binary_reader } else { - if (not sax->start_object()) + if (JSON_UNLIKELY(not sax->start_object())) { return false; } while (current != '}') { - if (not sax->key(get_ubjson_string(false))) + string_t key; + if (JSON_UNLIKELY(not get_ubjson_string(key, false) or not sax->key(std::move(key)))) { return false; } - if (not parse_ubjson_sax_internal(sax)) + if (JSON_UNLIKELY(not parse_ubjson_internal())) { return false; } @@ -1361,27 +1663,37 @@ class binary_reader } /*! - @brief throw if end of input is not reached - @throw parse_error.110 if input not ended + @return whether input was completely read */ - void expect_eof() const + bool expect_eof() const { if (JSON_UNLIKELY(current != std::char_traits::eof())) { - JSON_THROW(parse_error::create(110, chars_read, "expected end of input")); + return sax->parse_error(chars_read, get_token_string(), parse_error::create(110, chars_read, "expected end of input")); } + return true; } /*! - @briefthrow if end of input is reached - @throw parse_error.110 if input ended + @return whether the last read character is not EOF */ - void unexpect_eof() const + bool unexpect_eof() const { if (JSON_UNLIKELY(current == std::char_traits::eof())) { - JSON_THROW(parse_error::create(110, chars_read, "unexpected end of input")); + return sax->parse_error(chars_read, "", parse_error::create(110, chars_read, "unexpected end of input")); } + return true; + } + + /*! + @return a string representation of the last read byte + */ + std::string get_token_string() const + { + std::stringstream ss; + ss << std::setw(2) << std::uppercase << std::setfill('0') << std::hex << current; + return ss.str(); } private: @@ -1396,6 +1708,9 @@ class binary_reader /// whether we can assume little endianess const bool is_little_endian = little_endianess(); + + /// the SAX parser + json_sax_t* sax = nullptr; }; } } diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index a060c7fb..4d8ec2bc 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -5688,7 +5688,7 @@ namespace detail /////////////////// /*! -@brief deserialization of CBOR and MessagePack values +@brief deserialization of CBOR, MessagePack, and UBJSON values */ template class binary_reader @@ -5723,7 +5723,8 @@ class binary_reader { BasicJsonType result; json_sax_dom_parser sdp(result); - parse_cbor_sax_internal(&sdp); + sax = &sdp; + parse_cbor_internal(); result.assert_invariant(); if (strict) { @@ -5747,7 +5748,8 @@ class binary_reader { BasicJsonType result; json_sax_dom_parser sdp(result); - parse_msgpack_sax_internal(&sdp); + sax = &sdp; + parse_msgpack_internal(); result.assert_invariant(); if (strict) { @@ -5771,7 +5773,8 @@ class binary_reader { BasicJsonType result; json_sax_dom_parser sdp(result); - parse_ubjson_sax_internal(&sdp); + sax = &sdp; + parse_ubjson_internal(); result.assert_invariant(); if (strict) { @@ -5798,14 +5801,16 @@ class binary_reader @param[in] get_char whether a new character should be retrieved from the input (true, default) or whether the last read character should be considered instead + + @return whether a valid CBOR value was passed to the SAX parser */ - bool parse_cbor_sax_internal(json_sax_t* sax, const bool get_char = true) + bool parse_cbor_internal(const bool get_char = true) { switch (get_char ? get() : current) { // EOF case std::char_traits::eof(): - return sax->parse_error(chars_read, "", parse_error::create(110, chars_read, "unexpected end of input")); + return unexpect_eof(); // Integer 0x00..0x17 (0..23) case 0x00: @@ -5835,16 +5840,28 @@ class binary_reader return sax->number_unsigned(static_cast(current)); case 0x18: // Unsigned integer (one-byte uint8_t follows) - return sax->number_unsigned(get_number()); + { + uint8_t number; + return get_number(number) and sax->number_unsigned(number); + } case 0x19: // Unsigned integer (two-byte uint16_t follows) - return sax->number_unsigned(get_number()); + { + uint16_t number; + return get_number(number) and sax->number_unsigned(number); + } case 0x1A: // Unsigned integer (four-byte uint32_t follows) - return sax->number_unsigned(get_number()); + { + uint32_t number; + return get_number(number) and sax->number_unsigned(number); + } case 0x1B: // Unsigned integer (eight-byte uint64_t follows) - return sax->number_unsigned(get_number()); + { + uint64_t number; + return get_number(number) and sax->number_unsigned(number); + } // Negative integer -1-0x00..-1-0x17 (-1..-24) case 0x20: @@ -5874,17 +5891,29 @@ class binary_reader return sax->number_integer(static_cast(0x20 - 1 - current)); case 0x38: // Negative integer (one-byte uint8_t follows) - return sax->number_integer(static_cast(-1) - get_number()); + { + uint8_t number; + return get_number(number) and sax->number_integer(static_cast(-1) - number); + } case 0x39: // Negative integer -1-n (two-byte uint16_t follows) - return sax->number_integer(static_cast(-1) - get_number()); + { + uint16_t number; + return get_number(number) and sax->number_integer(static_cast(-1) - number); + } case 0x3A: // Negative integer -1-n (four-byte uint32_t follows) - return sax->number_integer(static_cast(-1) - get_number()); + { + uint32_t number; + return get_number(number) and sax->number_integer(static_cast(-1) - number); + } case 0x3B: // Negative integer -1-n (eight-byte uint64_t follows) - return sax->number_integer(static_cast(-1) - - static_cast(get_number())); + { + uint64_t number; + return get_number(number) and sax->number_integer(static_cast(-1) + - static_cast(number)); + } // UTF-8 string (0x00..0x17 bytes follow) case 0x60: @@ -5916,7 +5945,10 @@ class binary_reader case 0x7A: // UTF-8 string (four-byte uint32_t for n follow) case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow) case 0x7F: // UTF-8 string (indefinite length) - return sax->string(get_cbor_string()); + { + string_t s; + return get_cbor_string(s) and sax->string(std::move(s)); + } // array (0x00..0x17 data items follow) case 0x80: @@ -5943,22 +5975,34 @@ class binary_reader case 0x95: case 0x96: case 0x97: - return get_cbor_array_sax(sax, static_cast(current & 0x1F)); + return get_cbor_array(static_cast(current & 0x1F)); case 0x98: // array (one-byte uint8_t for n follows) - return get_cbor_array_sax(sax, static_cast(get_number())); + { + uint8_t len; + return get_number(len) and get_cbor_array(static_cast(len)); + } case 0x99: // array (two-byte uint16_t for n follow) - return get_cbor_array_sax(sax, static_cast(get_number())); + { + uint16_t len; + return get_number(len) and get_cbor_array(static_cast(len)); + } case 0x9A: // array (four-byte uint32_t for n follow) - return get_cbor_array_sax(sax, static_cast(get_number())); + { + uint32_t len; + return get_number(len) and get_cbor_array(static_cast(len)); + } case 0x9B: // array (eight-byte uint64_t for n follow) - return get_cbor_array_sax(sax, static_cast(get_number())); + { + uint64_t len; + return get_number(len) and get_cbor_array(static_cast(len)); + } case 0x9F: // array (indefinite length) - return get_cbor_array_sax(sax, json_sax_t::no_limit); + return get_cbor_array(json_sax_t::no_limit); // map (0x00..0x17 pairs of data items follow) case 0xA0: @@ -5985,22 +6029,34 @@ class binary_reader case 0xB5: case 0xB6: case 0xB7: - return get_cbor_object_sax(sax, static_cast(current & 0x1F)); + return get_cbor_object(static_cast(current & 0x1F)); case 0xB8: // map (one-byte uint8_t for n follows) - return get_cbor_object_sax(sax, static_cast(get_number())); + { + uint8_t len; + return get_number(len) and get_cbor_object(static_cast(len)); + } case 0xB9: // map (two-byte uint16_t for n follow) - return get_cbor_object_sax(sax, static_cast(get_number())); + { + uint16_t len; + return get_number(len) and get_cbor_object(static_cast(len)); + } case 0xBA: // map (four-byte uint32_t for n follow) - return get_cbor_object_sax(sax, static_cast(get_number())); + { + uint32_t len; + return get_number(len) and get_cbor_object(static_cast(len)); + } case 0xBB: // map (eight-byte uint64_t for n follow) - return get_cbor_object_sax(sax, static_cast(get_number())); + { + uint64_t len; + return get_number(len) and get_cbor_object(static_cast(len)); + } case 0xBF: // map (indefinite length) - return get_cbor_object_sax(sax, json_sax_t::no_limit); + return get_cbor_object(json_sax_t::no_limit); case 0xF4: // false return sax->boolean(false); @@ -6014,9 +6070,15 @@ class binary_reader case 0xF9: // Half-Precision Float (two-byte IEEE 754) { const int byte1 = get(); - unexpect_eof(); + if (JSON_UNLIKELY(not unexpect_eof())) + { + return false; + } const int byte2 = get(); - unexpect_eof(); + if (JSON_UNLIKELY(not unexpect_eof())) + { + return false; + } // code from RFC 7049, Appendix D, Figure 3: // As half-precision floating-point numbers were only added @@ -6047,27 +6109,35 @@ class binary_reader } case 0xFA: // Single-Precision Float (four-byte IEEE 754) - return sax->number_float(static_cast(get_number()), ""); + { + float number; + return get_number(number) and sax->number_float(static_cast(number), ""); + } case 0xFB: // Double-Precision Float (eight-byte IEEE 754) - return sax->number_float(get_number(), ""); + { + double number; + return get_number(number) and sax->number_float(number, ""); + } default: // anything else (0xFF is handled inside the other types) { - std::stringstream ss; - ss << std::setw(2) << std::uppercase << std::setfill('0') << std::hex << current; - return sax->parse_error(chars_read, ss.str(), parse_error::create(112, chars_read, "error reading CBOR; last byte: 0x" + ss.str())); + auto last_token = get_token_string(); + return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, "error reading CBOR; last byte: 0x" + last_token)); } } } - bool parse_msgpack_sax_internal(json_sax_t* sax) + /*! + @return whether a valid MessagePack value was passed to the SAX parser + */ + bool parse_msgpack_internal() { switch (get()) { // EOF case std::char_traits::eof(): - return sax->parse_error(chars_read, "", parse_error::create(110, chars_read, "unexpected end of input")); + return unexpect_eof(); // positive fixint case 0x00: @@ -6217,7 +6287,7 @@ class binary_reader case 0x8D: case 0x8E: case 0x8F: - return get_msgpack_object_sax(sax, static_cast(current & 0x0F)); + return get_msgpack_object(static_cast(current & 0x0F)); // fixarray case 0x90: @@ -6236,7 +6306,7 @@ class binary_reader case 0x9D: case 0x9E: case 0x9F: - return get_msgpack_array_sax(sax, static_cast(current & 0x0F)); + return get_msgpack_array(static_cast(current & 0x0F)); // fixstr case 0xA0: @@ -6271,7 +6341,10 @@ class binary_reader case 0xBD: case 0xBE: case 0xBF: - return sax->string(get_msgpack_string()); + { + string_t s; + return get_msgpack_string(s) and sax->string(std::move(s)); + } case 0xC0: // nil return sax->null(); @@ -6283,51 +6356,96 @@ class binary_reader return sax->boolean(true); case 0xCA: // float 32 - return sax->number_float(static_cast(get_number()), ""); + { + float number; + return get_number(number) and sax->number_float(static_cast(number), ""); + } case 0xCB: // float 64 - return sax->number_float(get_number(), ""); + { + double number; + return get_number(number) and sax->number_float(number, ""); + } case 0xCC: // uint 8 - return sax->number_unsigned(get_number()); + { + uint8_t number; + return get_number(number) and sax->number_unsigned(number); + } case 0xCD: // uint 16 - return sax->number_unsigned(get_number()); + { + uint16_t number; + return get_number(number) and sax->number_unsigned(number); + } case 0xCE: // uint 32 - return sax->number_unsigned(get_number()); + { + uint32_t number; + return get_number(number) and sax->number_unsigned(number); + } case 0xCF: // uint 64 - return sax->number_unsigned(get_number()); + { + uint64_t number; + return get_number(number) and sax->number_unsigned(number); + } case 0xD0: // int 8 - return sax->number_integer(get_number()); + { + int8_t number; + return get_number(number) and sax->number_integer(number); + } case 0xD1: // int 16 - return sax->number_integer(get_number()); + { + int16_t number; + return get_number(number) and sax->number_integer(number); + } case 0xD2: // int 32 - return sax->number_integer(get_number()); + { + int32_t number; + return get_number(number) and sax->number_integer(number); + } case 0xD3: // int 64 - return sax->number_integer(get_number()); + { + int64_t number; + return get_number(number) and sax->number_integer(number); + } case 0xD9: // str 8 case 0xDA: // str 16 case 0xDB: // str 32 - return sax->string(get_msgpack_string()); + { + string_t s; + return get_msgpack_string(s) and sax->string(std::move(s)); + } case 0xDC: // array 16 - return get_msgpack_array_sax(sax, static_cast(get_number())); + { + uint16_t len; + return get_number(len) and get_msgpack_array(static_cast(len)); + } case 0xDD: // array 32 - return get_msgpack_array_sax(sax, static_cast(get_number())); + { + uint32_t len; + return get_number(len) and get_msgpack_array(static_cast(len)); + } case 0xDE: // map 16 - return get_msgpack_object_sax(sax, static_cast(get_number())); + { + uint16_t len; + return get_number(len) and get_msgpack_object(static_cast(len)); + } case 0xDF: // map 32 - return get_msgpack_object_sax(sax, static_cast(get_number())); + { + uint32_t len; + return get_number(len) and get_msgpack_object(static_cast(len)); + } // negative fixint case 0xE0: @@ -6366,9 +6484,8 @@ class binary_reader default: // anything else { - std::stringstream ss; - ss << std::setw(2) << std::uppercase << std::setfill('0') << std::hex << current; - return sax->parse_error(chars_read, ss.str(), parse_error::create(112, chars_read, "error reading MessagePack; last byte: 0x" + ss.str())); + auto last_token = get_token_string(); + return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, "error reading MessagePack; last byte: 0x" + last_token)); } } } @@ -6377,10 +6494,12 @@ class binary_reader @param[in] get_char whether a new character should be retrieved from the input (true, default) or whether the last read character should be considered instead + + @return whether a valid UBJSON value was passed to the SAX parser */ - bool parse_ubjson_sax_internal(json_sax_t* sax, const bool get_char = true) + bool parse_ubjson_internal(const bool get_char = true) { - return get_ubjson_sax_value(sax, get_char ? get_ignore_noop() : current); + return get_ubjson_value(get_char ? get_ignore_noop() : current); } /*! @@ -6416,23 +6535,26 @@ class binary_reader @brief read a number from the input @tparam NumberType the type of the number + @param[out] result number of type @a NumberType - @return number of type @a NumberType + @return whether conversion completed @note This function needs to respect the system's endianess, because - bytes in CBOR and MessagePack are stored in network order (big - endian) and therefore need reordering on little endian systems. - - @throw parse_error.110 if input has less than `sizeof(NumberType)` bytes + bytes in CBOR, MessagePack, and UBJSON are stored in network order + (big endian) and therefore need reordering on little endian systems. */ - template NumberType get_number() + template + bool get_number(NumberType& result) { // step 1: read input into array with system's byte order std::array vec; for (std::size_t i = 0; i < sizeof(NumberType); ++i) { get(); - unexpect_eof(); + if (JSON_UNLIKELY(not unexpect_eof())) + { + return false; + } // reverse byte order prior to conversion if necessary if (is_little_endian) @@ -6446,35 +6568,37 @@ class binary_reader } // step 2: convert array into number of type T and return - NumberType result; std::memcpy(&result, vec.data(), sizeof(NumberType)); - return result; + return true; } /*! @brief create a string by reading characters from the input - @param[in] len number of bytes to read + @tparam NumberType the type of the number + @param[in] len number of characters to read + @param[out] string created by reading @a len bytes + + @return whether string creation completed @note We can not reserve @a len bytes for the result, because @a len may be too large. Usually, @ref unexpect_eof() detects the end of the input before we run out of string memory. - - @return string created by reading @a len bytes - - @throw parse_error.110 if input has less than @a len bytes */ template - string_t get_string(const NumberType len) + bool get_string(const NumberType len, string_t& result) { - string_t result; - std::generate_n(std::back_inserter(result), len, [this]() + bool success = true; + std::generate_n(std::back_inserter(result), len, [this, &success]() { get(); - unexpect_eof(); + if (JSON_UNLIKELY(not unexpect_eof())) + { + success = false; + } return static_cast(current); }); - return result; + return success; } /*! @@ -6484,14 +6608,16 @@ class binary_reader string length and then copies this number of bytes into a string. Additionally, CBOR's strings with indefinite lengths are supported. - @return string + @param[out] result created string - @throw parse_error.110 if input ended - @throw parse_error.113 if an unexpected byte is read + @return whether string creation completed */ - string_t get_cbor_string() + bool get_cbor_string(string_t& result) { - unexpect_eof(); + if (JSON_UNLIKELY(not unexpect_eof())) + { + return false; + } switch (current) { @@ -6520,42 +6646,64 @@ class binary_reader case 0x75: case 0x76: case 0x77: - return get_string(current & 0x1F); + { + return get_string(current & 0x1F, result); + } case 0x78: // UTF-8 string (one-byte uint8_t for n follows) - return get_string(get_number()); + { + uint8_t len; + return get_number(len) and get_string(len, result); + } case 0x79: // UTF-8 string (two-byte uint16_t for n follow) - return get_string(get_number()); + { + uint16_t len; + return get_number(len) and get_string(len, result); + } case 0x7A: // UTF-8 string (four-byte uint32_t for n follow) - return get_string(get_number()); + { + uint32_t len; + return get_number(len) and get_string(len, result); + } case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow) - return get_string(get_number()); + { + uint64_t len; + return get_number(len) and get_string(len, result); + } case 0x7F: // UTF-8 string (indefinite length) { - string_t result; while (get() != 0xFF) { - result.append(get_cbor_string()); + string_t chunk; + if (not get_cbor_string(chunk)) + { + return false; + } + result.append(chunk); } - return result; + return true; } default: { - std::stringstream ss; - ss << std::setw(2) << std::uppercase << std::setfill('0') << std::hex << current; - JSON_THROW(parse_error::create(113, chars_read, "expected a CBOR string; last byte: 0x" + ss.str())); + auto last_token = get_token_string(); + return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, "expected a CBOR string; last byte: 0x" + last_token)); } } } - bool get_cbor_array_sax(json_sax_t* sax, const std::size_t len) + /*! + @param[in] len the length of the array or json_sax_t::no_limit for an + array of indefinite size + @return whether array creation completed + */ + bool get_cbor_array(const std::size_t len) { - if (not sax->start_array(len)) + if (JSON_UNLIKELY(not sax->start_array(len))) { return false; } @@ -6563,7 +6711,7 @@ class binary_reader if (len != json_sax_t::no_limit) for (std::size_t i = 0; i < len; ++i) { - if (not parse_cbor_sax_internal(sax)) + if (JSON_UNLIKELY(not parse_cbor_internal())) { return false; } @@ -6572,7 +6720,7 @@ class binary_reader { while (get() != 0xFF) { - if (not parse_cbor_sax_internal(sax, false)) + if (JSON_UNLIKELY(not parse_cbor_internal(false))) { return false; } @@ -6582,9 +6730,14 @@ class binary_reader return sax->end_array(); } - bool get_cbor_object_sax(json_sax_t* sax, const std::size_t len) + /*! + @param[in] len the length of the object or json_sax_t::no_limit for an + object of indefinite size + @return whether object creation completed + */ + bool get_cbor_object(const std::size_t len) { - if (not sax->start_object(len)) + if (not JSON_UNLIKELY(sax->start_object(len))) { return false; } @@ -6594,12 +6747,13 @@ class binary_reader for (std::size_t i = 0; i < len; ++i) { get(); - if (not sax->key(get_cbor_string())) + string_t key; + if (JSON_UNLIKELY(not get_cbor_string(key) or not sax->key(std::move(key)))) { return false; } - if (not parse_cbor_sax_internal(sax)) + if (JSON_UNLIKELY(not parse_cbor_internal())) { return false; } @@ -6609,12 +6763,13 @@ class binary_reader { while (get() != 0xFF) { - if (not sax->key(get_cbor_string())) + string_t key; + if (JSON_UNLIKELY(not get_cbor_string(key) or not sax->key(std::move(key)))) { return false; } - if (not parse_cbor_sax_internal(sax)) + if (JSON_UNLIKELY(not parse_cbor_internal())) { return false; } @@ -6630,14 +6785,16 @@ class binary_reader This function first reads starting bytes to determine the expected string length and then copies this number of bytes into a string. - @return string + @param[out] result created string - @throw parse_error.110 if input ended - @throw parse_error.113 if an unexpected byte is read + @return whether string creation completed */ - string_t get_msgpack_string() + bool get_msgpack_string(string_t& result) { - unexpect_eof(); + if (JSON_UNLIKELY(not unexpect_eof())) + { + return false; + } switch (current) { @@ -6674,37 +6831,50 @@ class binary_reader case 0xBD: case 0xBE: case 0xBF: - return get_string(current & 0x1F); + { + return get_string(current & 0x1F, result); + } case 0xD9: // str 8 - return get_string(get_number()); + { + uint8_t len; + return get_number(len) and get_string(len, result); + } case 0xDA: // str 16 - return get_string(get_number()); + { + uint16_t len; + return get_number(len) and get_string(len, result); + } case 0xDB: // str 32 - return get_string(get_number()); + { + uint32_t len; + return get_number(len) and get_string(len, result); + } default: { - std::stringstream ss; - ss << std::setw(2) << std::uppercase << std::setfill('0') << std::hex << current; - JSON_THROW(parse_error::create(113, chars_read, - "expected a MessagePack string; last byte: 0x" + ss.str())); + auto last_token = get_token_string(); + return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, "expected a MessagePack string; last byte: 0x" + last_token)); } } } - bool get_msgpack_array_sax(json_sax_t* sax, const std::size_t len) + /*! + @param[in] len the length of the array + @return whether array creation completed + */ + bool get_msgpack_array(const std::size_t len) { - if (not sax->start_array(len)) + if (JSON_UNLIKELY(not sax->start_array(len))) { return false; } for (std::size_t i = 0; i < len; ++i) { - if (not parse_msgpack_sax_internal(sax)) + if (JSON_UNLIKELY(not parse_msgpack_internal())) { return false; } @@ -6713,9 +6883,13 @@ class binary_reader return sax->end_array(); } - bool get_msgpack_object_sax(json_sax_t* sax, const std::size_t len) + /*! + @param[in] len the length of the object + @return whether object creation completed + */ + bool get_msgpack_object(const std::size_t len) { - if (not sax->start_object(len)) + if (JSON_UNLIKELY(not sax->start_object(len))) { return false; } @@ -6723,12 +6897,13 @@ class binary_reader for (std::size_t i = 0; i < len; ++i) { get(); - if (not sax->key(get_msgpack_string())) + string_t key; + if (JSON_UNLIKELY(not get_msgpack_string(key) or not sax->key(std::move(key)))) { return false; } - if (not parse_msgpack_sax_internal(sax)) + if (JSON_UNLIKELY(not parse_msgpack_internal())) { return false; } @@ -6744,60 +6919,131 @@ class binary_reader indicating a string, or in case of an object key where the 'S' byte can be left out. + @param[out] result created string @param[in] get_char whether a new character should be retrieved from the input (true, default) or whether the last read character should be considered instead - @return string - - @throw parse_error.110 if input ended - @throw parse_error.113 if an unexpected byte is read + @return whether string creation completed */ - string_t get_ubjson_string(const bool get_char = true) + bool get_ubjson_string(string_t& result, const bool get_char = true) { if (get_char) { get(); // TODO: may we ignore N here? } - unexpect_eof(); + if (JSON_UNLIKELY(not unexpect_eof())) + { + return false; + } switch (current) { case 'U': - return get_string(get_number()); + { + uint8_t len; + return get_number(len) and get_string(len, result); + } + case 'i': - return get_string(get_number()); + { + int8_t len; + return get_number(len) and get_string(len, result); + } + case 'I': - return get_string(get_number()); + { + int16_t len; + return get_number(len) and get_string(len, result); + } + case 'l': - return get_string(get_number()); + { + int32_t len; + return get_number(len) and get_string(len, result); + } + case 'L': - return get_string(get_number()); + { + int64_t len; + return get_number(len) and get_string(len, result); + } + default: - std::stringstream ss; - ss << std::setw(2) << std::uppercase << std::setfill('0') << std::hex << current; - JSON_THROW(parse_error::create(113, chars_read, - "expected a UBJSON string; last byte: 0x" + ss.str())); + auto last_token = get_token_string(); + return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, "expected a UBJSON string; last byte: 0x" + last_token)); } } - std::size_t get_ubjson_size_value() + /*! + @param[out] result determined size + @return whether size determination completed + */ + bool get_ubjson_size_value(std::size_t& result) { switch (get_ignore_noop()) { case 'U': - return static_cast(get_number()); + { + uint8_t number; + if (JSON_UNLIKELY(not get_number(number))) + { + return false; + } + result = static_cast(number); + return true; + } + case 'i': - return static_cast(get_number()); + { + int8_t number; + if (JSON_UNLIKELY(not get_number(number))) + { + return false; + } + result = static_cast(number); + return true; + } + case 'I': - return static_cast(get_number()); + { + int16_t number; + if (JSON_UNLIKELY(not get_number(number))) + { + return false; + } + result = static_cast(number); + return true; + } + case 'l': - return static_cast(get_number()); + { + int32_t number; + if (JSON_UNLIKELY(not get_number(number))) + { + return false; + } + result = static_cast(number); + return true; + } + case 'L': - return static_cast(get_number()); + { + int64_t number; + if (JSON_UNLIKELY(not get_number(number))) + { + return false; + } + result = static_cast(number); + return true; + } + default: - return std::size_t(-1); + { + result = std::size_t(-1); + return true; + } } } @@ -6807,44 +7053,51 @@ class binary_reader In the optimized UBJSON format, a type and a size can be provided to allow for a more compact representation. - @return pair of the size and the type + @param[out] result pair of the size and the type + + @return whether pair creation completed */ - std::pair get_ubjson_size_type() + bool get_ubjson_size_type(std::pair& result) { - std::size_t sz = string_t::npos; - int tc = 0; + result.first = string_t::npos; // size + result.second = 0; // type get_ignore_noop(); if (current == '$') { - tc = get(); // must not ignore 'N', because 'N' maybe the type - unexpect_eof(); + result.second = get(); // must not ignore 'N', because 'N' maybe the type + if (JSON_UNLIKELY(not unexpect_eof())) + { + return false; + } get_ignore_noop(); - if (current != '#') + if (JSON_UNLIKELY(current != '#')) { - std::stringstream ss; - ss << std::setw(2) << std::uppercase << std::setfill('0') << std::hex << current; - JSON_THROW(parse_error::create(112, chars_read, - "expected '#' after UBJSON type information; last byte: 0x" + ss.str())); + auto last_token = get_token_string(); + return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, "expected '#' after UBJSON type information; last byte: 0x" + last_token)); } - sz = get_ubjson_size_value(); + + return get_ubjson_size_value(result.first); } else if (current == '#') { - sz = get_ubjson_size_value(); + return get_ubjson_size_value(result.first); } - - return std::make_pair(sz, tc); + return true; } - bool get_ubjson_sax_value(json_sax_t* sax, const int prefix) + /*! + @param prefix the previously read or set type prefix + @return whether value creation completed + */ + bool get_ubjson_value(const int prefix) { switch (prefix) { case std::char_traits::eof(): // EOF - return sax->parse_error(chars_read, "", parse_error::create(110, chars_read, "unexpected end of input")); + return unexpect_eof(); case 'T': // true return sax->boolean(true); @@ -6855,57 +7108,96 @@ class binary_reader return sax->null(); case 'U': - return sax->number_unsigned(get_number()); + { + uint8_t number; + return get_number(number) and sax->number_unsigned(number); + } + case 'i': - return sax->number_integer(get_number()); + { + int8_t number; + return get_number(number) and sax->number_integer(number); + } + case 'I': - return sax->number_integer(get_number()); + { + int16_t number; + return get_number(number) and sax->number_integer(number); + } + case 'l': - return sax->number_integer(get_number()); + { + int32_t number; + return get_number(number) and sax->number_integer(number); + } + case 'L': - return sax->number_integer(get_number()); + { + int64_t number; + return get_number(number) and sax->number_integer(number); + } + case 'd': - return sax->number_float(static_cast(get_number()), ""); + { + float number; + return get_number(number) and sax->number_float(static_cast(number), ""); + } + case 'D': - return sax->number_float(get_number(), ""); + { + double number; + return get_number(number) and sax->number_float(number, ""); + } case 'C': // char { get(); - unexpect_eof(); + if (JSON_UNLIKELY(not unexpect_eof())) + { + return false; + } if (JSON_UNLIKELY(current > 127)) { - std::stringstream ss; - ss << std::setw(2) << std::uppercase << std::setfill('0') << std::hex << current; - return sax->parse_error(chars_read, ss.str(), parse_error::create(113, chars_read, "byte after 'C' must be in range 0x00..0x7F; last byte: 0x" + ss.str())); + auto last_token = get_token_string(); + return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, "byte after 'C' must be in range 0x00..0x7F; last byte: 0x" + last_token)); } return sax->string(string_t(1, static_cast(current))); } case 'S': // string - return sax->string(get_ubjson_string()); + { + string_t s; + return get_ubjson_string(s) and sax->string(std::move(s)); + } case '[': // array - return get_ubjson_sax_array(sax); + return get_ubjson_array(); case '{': // object - return get_ubjson_sax_object(sax); + return get_ubjson_object(); default: // anything else - std::stringstream ss; - ss << std::setw(2) << std::uppercase << std::setfill('0') << std::hex << current; - JSON_THROW(parse_error::create(112, chars_read, - "error reading UBJSON; last byte: 0x" + ss.str())); + { + auto last_token = get_token_string(); + return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, "error reading UBJSON; last byte: 0x" + last_token)); + } } } - bool get_ubjson_sax_array(json_sax_t* sax) + /*! + @return whether array creation completed + */ + bool get_ubjson_array() { - const auto size_and_type = get_ubjson_size_type(); + std::pair size_and_type; + if (JSON_UNLIKELY(not get_ubjson_size_type(size_and_type))) + { + return false; + } if (size_and_type.first != string_t::npos) { - if (not sax->start_array(size_and_type.first)) + if (JSON_UNLIKELY(not sax->start_array(size_and_type.first))) { return false; } @@ -6916,7 +7208,7 @@ class binary_reader { for (std::size_t i = 0; i < size_and_type.first; ++i) { - if (not get_ubjson_sax_value(sax, size_and_type.second)) + if (JSON_UNLIKELY(not get_ubjson_value(size_and_type.second))) { return false; } @@ -6927,7 +7219,7 @@ class binary_reader { for (std::size_t i = 0; i < size_and_type.first; ++i) { - if (not parse_ubjson_sax_internal(sax)) + if (JSON_UNLIKELY(not parse_ubjson_internal())) { return false; } @@ -6936,14 +7228,14 @@ class binary_reader } else { - if (not sax->start_array()) + if (JSON_UNLIKELY(not sax->start_array())) { return false; } while (current != ']') { - if (not parse_ubjson_sax_internal(sax, false)) + if (JSON_UNLIKELY(not parse_ubjson_internal(false))) { return false; } @@ -6954,13 +7246,20 @@ class binary_reader return sax->end_array(); } - bool get_ubjson_sax_object(json_sax_t* sax) + /*! + @return whether object creation completed + */ + bool get_ubjson_object() { - const auto size_and_type = get_ubjson_size_type(); + std::pair size_and_type; + if (JSON_UNLIKELY(not get_ubjson_size_type(size_and_type))) + { + return false; + } if (size_and_type.first != string_t::npos) { - if (not sax->start_object(size_and_type.first)) + if (JSON_UNLIKELY(not sax->start_object(size_and_type.first))) { return false; } @@ -6969,11 +7268,12 @@ class binary_reader { for (std::size_t i = 0; i < size_and_type.first; ++i) { - if (not sax->key(get_ubjson_string())) + string_t key; + if (JSON_UNLIKELY(not get_ubjson_string(key) or not sax->key(std::move(key)))) { return false; } - if (not get_ubjson_sax_value(sax, size_and_type.second)) + if (JSON_UNLIKELY(not get_ubjson_value(size_and_type.second))) { return false; } @@ -6983,11 +7283,12 @@ class binary_reader { for (std::size_t i = 0; i < size_and_type.first; ++i) { - if (not sax->key(get_ubjson_string())) + string_t key; + if (JSON_UNLIKELY(not get_ubjson_string(key) or not sax->key(std::move(key)))) { return false; } - if (not parse_ubjson_sax_internal(sax)) + if (JSON_UNLIKELY(not parse_ubjson_internal())) { return false; } @@ -6996,18 +7297,19 @@ class binary_reader } else { - if (not sax->start_object()) + if (JSON_UNLIKELY(not sax->start_object())) { return false; } while (current != '}') { - if (not sax->key(get_ubjson_string(false))) + string_t key; + if (JSON_UNLIKELY(not get_ubjson_string(key, false) or not sax->key(std::move(key)))) { return false; } - if (not parse_ubjson_sax_internal(sax)) + if (JSON_UNLIKELY(not parse_ubjson_internal())) { return false; } @@ -7019,27 +7321,37 @@ class binary_reader } /*! - @brief throw if end of input is not reached - @throw parse_error.110 if input not ended + @return whether input was completely read */ - void expect_eof() const + bool expect_eof() const { if (JSON_UNLIKELY(current != std::char_traits::eof())) { - JSON_THROW(parse_error::create(110, chars_read, "expected end of input")); + return sax->parse_error(chars_read, get_token_string(), parse_error::create(110, chars_read, "expected end of input")); } + return true; } /*! - @briefthrow if end of input is reached - @throw parse_error.110 if input ended + @return whether the last read character is not EOF */ - void unexpect_eof() const + bool unexpect_eof() const { if (JSON_UNLIKELY(current == std::char_traits::eof())) { - JSON_THROW(parse_error::create(110, chars_read, "unexpected end of input")); + return sax->parse_error(chars_read, "", parse_error::create(110, chars_read, "unexpected end of input")); } + return true; + } + + /*! + @return a string representation of the last read byte + */ + std::string get_token_string() const + { + std::stringstream ss; + ss << std::setw(2) << std::uppercase << std::setfill('0') << std::hex << current; + return ss.str(); } private: @@ -7054,6 +7366,9 @@ class binary_reader /// whether we can assume little endianess const bool is_little_endian = little_endianess(); + + /// the SAX parser + json_sax_t* sax = nullptr; }; } } From 9e07e9b4ecc05a51cc559e3b871d633336f2c896 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Mon, 19 Mar 2018 22:48:13 +0100 Subject: [PATCH 27/43] :sparkles: implemented non-throwing binary reader --- .../nlohmann/detail/input/binary_reader.hpp | 110 ++++------- include/nlohmann/json.hpp | 62 +++++-- single_include/nlohmann/json.hpp | 171 +++++++++--------- test/src/unit-cbor.cpp | 79 ++++++++ 4 files changed, 252 insertions(+), 170 deletions(-) diff --git a/include/nlohmann/detail/input/binary_reader.hpp b/include/nlohmann/detail/input/binary_reader.hpp index f5e1cc34..429502ea 100644 --- a/include/nlohmann/detail/input/binary_reader.hpp +++ b/include/nlohmann/detail/input/binary_reader.hpp @@ -41,6 +41,9 @@ class binary_reader using json_sax_t = json_sax; public: + /// the supported binary input formats + enum class binary_format_t { cbor, msgpack, ubjson }; + /*! @brief create a binary reader @@ -52,77 +55,50 @@ class binary_reader } /*! - @brief create a JSON value from CBOR input - + @param[in] format the binary format to parse + @param[in] sax_ a SAX event processor @param[in] strict whether to expect the input to be consumed completed - @return JSON value created from CBOR input - @throw parse_error.110 if input ended unexpectedly or the end of file was - not reached when @a strict was set to true - @throw parse_error.112 if unsupported byte was read + @return */ - BasicJsonType parse_cbor(const bool strict) + bool sax_parse(const binary_format_t format, json_sax_t* sax_, const bool strict) { - BasicJsonType result; - json_sax_dom_parser sdp(result); - sax = &sdp; - parse_cbor_internal(); - result.assert_invariant(); - if (strict) + sax = sax_; + bool result; + + switch (format) { - get(); - expect_eof(); + case binary_format_t::cbor: + result = parse_cbor_internal(); + break; + + case binary_format_t::msgpack: + result = parse_msgpack_internal(); + break; + + case binary_format_t::ubjson: + result = parse_ubjson_internal(); + break; } - return result; - } - /*! - @brief create a JSON value from MessagePack input - - @param[in] strict whether to expect the input to be consumed completed - @return JSON value created from MessagePack input - - @throw parse_error.110 if input ended unexpectedly or the end of file was - not reached when @a strict was set to true - @throw parse_error.112 if unsupported byte was read - */ - BasicJsonType parse_msgpack(const bool strict) - { - BasicJsonType result; - json_sax_dom_parser sdp(result); - sax = &sdp; - parse_msgpack_internal(); - result.assert_invariant(); - if (strict) + // strict mode: next byte must be EOF + if (result and strict) { - get(); - expect_eof(); + if (format == binary_format_t::ubjson) + { + get_ignore_noop(); + } + else + { + get(); + } + + if (JSON_UNLIKELY(current != std::char_traits::eof())) + { + return sax->parse_error(chars_read, get_token_string(), parse_error::create(110, chars_read, "expected end of input")); + } } - return result; - } - /*! - @brief create a JSON value from UBJSON input - - @param[in] strict whether to expect the input to be consumed completed - @return JSON value created from UBJSON input - - @throw parse_error.110 if input ended unexpectedly or the end of file was - not reached when @a strict was set to true - @throw parse_error.112 if unsupported byte was read - */ - BasicJsonType parse_ubjson(const bool strict) - { - BasicJsonType result; - json_sax_dom_parser sdp(result); - sax = &sdp; - parse_ubjson_internal(); - result.assert_invariant(); - if (strict) - { - get_ignore_noop(); - expect_eof(); - } return result; } @@ -1662,18 +1638,6 @@ class binary_reader return sax->end_object(); } - /*! - @return whether input was completely read - */ - bool expect_eof() const - { - if (JSON_UNLIKELY(current != std::char_traits::eof())) - { - return sax->parse_error(chars_read, get_token_string(), parse_error::create(110, chars_read, "expected end of input")); - } - return true; - } - /*! @return whether the last read character is not EOF */ diff --git a/include/nlohmann/json.hpp b/include/nlohmann/json.hpp index bdc905f1..393fa2ea 100644 --- a/include/nlohmann/json.hpp +++ b/include/nlohmann/json.hpp @@ -6611,6 +6611,9 @@ class basic_json @param[in] i an input in CBOR format convertible to an input adapter @param[in] strict whether to expect the input to be consumed until EOF (true by default) + @param[in] allow_exceptions whether to throw exceptions in case of a + parse error (optional, true by default) + @return deserialized JSON value @throw parse_error.110 if the given input ends prematurely or the end of @@ -6636,9 +6639,13 @@ class basic_json @a strict parameter since 3.0.0 */ static basic_json from_cbor(detail::input_adapter i, - const bool strict = true) + const bool strict = true, + const bool allow_exceptions = true) { - return binary_reader(i).parse_cbor(strict); + basic_json result; + detail::json_sax_dom_parser sdp(result, allow_exceptions); + const bool res = binary_reader(detail::input_adapter(i)).sax_parse(binary_reader::binary_format_t::cbor, &sdp, strict); + return res ? result : basic_json(value_t::discarded); } /*! @@ -6646,9 +6653,14 @@ class basic_json */ template::value, int> = 0> - static basic_json from_cbor(A1 && a1, A2 && a2, const bool strict = true) + static basic_json from_cbor(A1 && a1, A2 && a2, + const bool strict = true, + const bool allow_exceptions = true) { - return binary_reader(detail::input_adapter(std::forward(a1), std::forward(a2))).parse_cbor(strict); + basic_json result; + detail::json_sax_dom_parser sdp(result, allow_exceptions); + const bool res = binary_reader(detail::input_adapter(std::forward(a1), std::forward(a2))).sax_parse(binary_reader::binary_format_t::cbor, &sdp, strict); + return res ? result : basic_json(value_t::discarded); } /*! @@ -6701,6 +6713,10 @@ class basic_json adapter @param[in] strict whether to expect the input to be consumed until EOF (true by default) + @param[in] allow_exceptions whether to throw exceptions in case of a + parse error (optional, true by default) + + @return deserialized JSON value @throw parse_error.110 if the given input ends prematurely or the end of file was not reached when @a strict was set to true @@ -6725,9 +6741,13 @@ class basic_json @a strict parameter since 3.0.0 */ static basic_json from_msgpack(detail::input_adapter i, - const bool strict = true) + const bool strict = true, + const bool allow_exceptions = true) { - return binary_reader(i).parse_msgpack(strict); + basic_json result; + detail::json_sax_dom_parser sdp(result, allow_exceptions); + const bool res = binary_reader(detail::input_adapter(i)).sax_parse(binary_reader::binary_format_t::msgpack, &sdp, strict); + return res ? result : basic_json(value_t::discarded); } /*! @@ -6735,9 +6755,14 @@ class basic_json */ template::value, int> = 0> - static basic_json from_msgpack(A1 && a1, A2 && a2, const bool strict = true) + static basic_json from_msgpack(A1 && a1, A2 && a2, + const bool strict = true, + const bool allow_exceptions = true) { - return binary_reader(detail::input_adapter(std::forward(a1), std::forward(a2))).parse_msgpack(strict); + basic_json result; + detail::json_sax_dom_parser sdp(result, allow_exceptions); + const bool res = binary_reader(detail::input_adapter(std::forward(a1), std::forward(a2))).sax_parse(binary_reader::binary_format_t::msgpack, &sdp, strict); + return res ? result : basic_json(value_t::discarded); } /*! @@ -6772,6 +6797,10 @@ class basic_json @param[in] i an input in UBJSON format convertible to an input adapter @param[in] strict whether to expect the input to be consumed until EOF (true by default) + @param[in] allow_exceptions whether to throw exceptions in case of a + parse error (optional, true by default) + + @return deserialized JSON value @throw parse_error.110 if the given input ends prematurely or the end of file was not reached when @a strict was set to true @@ -6794,16 +6823,25 @@ class basic_json @since version 3.1.0 */ static basic_json from_ubjson(detail::input_adapter i, - const bool strict = true) + const bool strict = true, + const bool allow_exceptions = true) { - return binary_reader(i).parse_ubjson(strict); + basic_json result; + detail::json_sax_dom_parser sdp(result, allow_exceptions); + const bool res = binary_reader(detail::input_adapter(i)).sax_parse(binary_reader::binary_format_t::ubjson, &sdp, strict); + return res ? result : basic_json(value_t::discarded); } template::value, int> = 0> - static basic_json from_ubjson(A1 && a1, A2 && a2, const bool strict = true) + static basic_json from_ubjson(A1 && a1, A2 && a2, + const bool strict = true, + const bool allow_exceptions = true) { - return binary_reader(detail::input_adapter(std::forward(a1), std::forward(a2))).parse_ubjson(strict); + basic_json result; + detail::json_sax_dom_parser sdp(result, allow_exceptions); + const bool res = binary_reader(detail::input_adapter(std::forward(a1), std::forward(a2))).sax_parse(binary_reader::binary_format_t::ubjson, &sdp, strict); + return res ? result : basic_json(value_t::discarded); } /// @} diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index 4d8ec2bc..ea4f1995 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -5699,6 +5699,8 @@ class binary_reader using json_sax_t = json_sax; public: + enum class binary_format_t { cbor, msgpack, ubjson }; + /*! @brief create a binary reader @@ -5710,77 +5712,50 @@ class binary_reader } /*! - @brief create a JSON value from CBOR input - + @param[in] format the binary format to parse + @param[in] sax_ a SAX event processor @param[in] strict whether to expect the input to be consumed completed - @return JSON value created from CBOR input - @throw parse_error.110 if input ended unexpectedly or the end of file was - not reached when @a strict was set to true - @throw parse_error.112 if unsupported byte was read + @return */ - BasicJsonType parse_cbor(const bool strict) + bool sax_parse(const binary_format_t format, json_sax_t* sax_, const bool strict) { - BasicJsonType result; - json_sax_dom_parser sdp(result); - sax = &sdp; - parse_cbor_internal(); - result.assert_invariant(); - if (strict) + sax = sax_; + bool result; + + switch (format) { - get(); - expect_eof(); + case binary_format_t::cbor: + result = parse_cbor_internal(); + break; + + case binary_format_t::msgpack: + result = parse_msgpack_internal(); + break; + + case binary_format_t::ubjson: + result = parse_ubjson_internal(); + break; } - return result; - } - /*! - @brief create a JSON value from MessagePack input - - @param[in] strict whether to expect the input to be consumed completed - @return JSON value created from MessagePack input - - @throw parse_error.110 if input ended unexpectedly or the end of file was - not reached when @a strict was set to true - @throw parse_error.112 if unsupported byte was read - */ - BasicJsonType parse_msgpack(const bool strict) - { - BasicJsonType result; - json_sax_dom_parser sdp(result); - sax = &sdp; - parse_msgpack_internal(); - result.assert_invariant(); - if (strict) + // strict mode: next byte must be EOF + if (result and strict) { - get(); - expect_eof(); + if (format == binary_format_t::ubjson) + { + get_ignore_noop(); + } + else + { + get(); + } + + if (JSON_UNLIKELY(current != std::char_traits::eof())) + { + return sax->parse_error(chars_read, get_token_string(), parse_error::create(110, chars_read, "expected end of input")); + } } - return result; - } - /*! - @brief create a JSON value from UBJSON input - - @param[in] strict whether to expect the input to be consumed completed - @return JSON value created from UBJSON input - - @throw parse_error.110 if input ended unexpectedly or the end of file was - not reached when @a strict was set to true - @throw parse_error.112 if unsupported byte was read - */ - BasicJsonType parse_ubjson(const bool strict) - { - BasicJsonType result; - json_sax_dom_parser sdp(result); - sax = &sdp; - parse_ubjson_internal(); - result.assert_invariant(); - if (strict) - { - get_ignore_noop(); - expect_eof(); - } return result; } @@ -7320,18 +7295,6 @@ class binary_reader return sax->end_object(); } - /*! - @return whether input was completely read - */ - bool expect_eof() const - { - if (JSON_UNLIKELY(current != std::char_traits::eof())) - { - return sax->parse_error(chars_read, get_token_string(), parse_error::create(110, chars_read, "expected end of input")); - } - return true; - } - /*! @return whether the last read character is not EOF */ @@ -17392,6 +17355,9 @@ class basic_json @param[in] i an input in CBOR format convertible to an input adapter @param[in] strict whether to expect the input to be consumed until EOF (true by default) + @param[in] allow_exceptions whether to throw exceptions in case of a + parse error (optional, true by default) + @return deserialized JSON value @throw parse_error.110 if the given input ends prematurely or the end of @@ -17417,9 +17383,13 @@ class basic_json @a strict parameter since 3.0.0 */ static basic_json from_cbor(detail::input_adapter i, - const bool strict = true) + const bool strict = true, + const bool allow_exceptions = true) { - return binary_reader(i).parse_cbor(strict); + basic_json result; + detail::json_sax_dom_parser sdp(result, allow_exceptions); + const bool res = binary_reader(detail::input_adapter(i)).sax_parse(binary_reader::binary_format_t::cbor, &sdp, strict); + return res ? result : basic_json(value_t::discarded); } /*! @@ -17427,9 +17397,14 @@ class basic_json */ template::value, int> = 0> - static basic_json from_cbor(A1 && a1, A2 && a2, const bool strict = true) + static basic_json from_cbor(A1 && a1, A2 && a2, + const bool strict = true, + const bool allow_exceptions = true) { - return binary_reader(detail::input_adapter(std::forward(a1), std::forward(a2))).parse_cbor(strict); + basic_json result; + detail::json_sax_dom_parser sdp(result, allow_exceptions); + const bool res = binary_reader(detail::input_adapter(std::forward(a1), std::forward(a2))).sax_parse(binary_reader::binary_format_t::cbor, &sdp, strict); + return res ? result : basic_json(value_t::discarded); } /*! @@ -17482,6 +17457,10 @@ class basic_json adapter @param[in] strict whether to expect the input to be consumed until EOF (true by default) + @param[in] allow_exceptions whether to throw exceptions in case of a + parse error (optional, true by default) + + @return deserialized JSON value @throw parse_error.110 if the given input ends prematurely or the end of file was not reached when @a strict was set to true @@ -17506,9 +17485,13 @@ class basic_json @a strict parameter since 3.0.0 */ static basic_json from_msgpack(detail::input_adapter i, - const bool strict = true) + const bool strict = true, + const bool allow_exceptions = true) { - return binary_reader(i).parse_msgpack(strict); + basic_json result; + detail::json_sax_dom_parser sdp(result, allow_exceptions); + const bool res = binary_reader(detail::input_adapter(i)).sax_parse(binary_reader::binary_format_t::msgpack, &sdp, strict); + return res ? result : basic_json(value_t::discarded); } /*! @@ -17516,9 +17499,14 @@ class basic_json */ template::value, int> = 0> - static basic_json from_msgpack(A1 && a1, A2 && a2, const bool strict = true) + static basic_json from_msgpack(A1 && a1, A2 && a2, + const bool strict = true, + const bool allow_exceptions = true) { - return binary_reader(detail::input_adapter(std::forward(a1), std::forward(a2))).parse_msgpack(strict); + basic_json result; + detail::json_sax_dom_parser sdp(result, allow_exceptions); + const bool res = binary_reader(detail::input_adapter(std::forward(a1), std::forward(a2))).sax_parse(binary_reader::binary_format_t::msgpack, &sdp, strict); + return res ? result : basic_json(value_t::discarded); } /*! @@ -17553,6 +17541,10 @@ class basic_json @param[in] i an input in UBJSON format convertible to an input adapter @param[in] strict whether to expect the input to be consumed until EOF (true by default) + @param[in] allow_exceptions whether to throw exceptions in case of a + parse error (optional, true by default) + + @return deserialized JSON value @throw parse_error.110 if the given input ends prematurely or the end of file was not reached when @a strict was set to true @@ -17575,16 +17567,25 @@ class basic_json @since version 3.1.0 */ static basic_json from_ubjson(detail::input_adapter i, - const bool strict = true) + const bool strict = true, + const bool allow_exceptions = true) { - return binary_reader(i).parse_ubjson(strict); + basic_json result; + detail::json_sax_dom_parser sdp(result, allow_exceptions); + const bool res = binary_reader(detail::input_adapter(i)).sax_parse(binary_reader::binary_format_t::ubjson, &sdp, strict); + return res ? result : basic_json(value_t::discarded); } template::value, int> = 0> - static basic_json from_ubjson(A1 && a1, A2 && a2, const bool strict = true) + static basic_json from_ubjson(A1 && a1, A2 && a2, + const bool strict = true, + const bool allow_exceptions = true) { - return binary_reader(detail::input_adapter(std::forward(a1), std::forward(a2))).parse_ubjson(strict); + basic_json result; + detail::json_sax_dom_parser sdp(result, allow_exceptions); + const bool res = binary_reader(detail::input_adapter(std::forward(a1), std::forward(a2))).sax_parse(binary_reader::binary_format_t::ubjson, &sdp, strict); + return res ? result : basic_json(value_t::discarded); } /// @} diff --git a/test/src/unit-cbor.cpp b/test/src/unit-cbor.cpp index 6b9eac52..e8b8c6f4 100644 --- a/test/src/unit-cbor.cpp +++ b/test/src/unit-cbor.cpp @@ -54,6 +54,7 @@ TEST_CASE("CBOR") // roundtrip CHECK(json::from_cbor(result) == j); + CHECK(json::from_cbor(result, true, false) == j); } SECTION("boolean") @@ -67,6 +68,7 @@ TEST_CASE("CBOR") // roundtrip CHECK(json::from_cbor(result) == j); + CHECK(json::from_cbor(result, true, false) == j); } SECTION("false") @@ -78,6 +80,7 @@ TEST_CASE("CBOR") // roundtrip CHECK(json::from_cbor(result) == j); + CHECK(json::from_cbor(result, true, false) == j); } } @@ -142,6 +145,7 @@ TEST_CASE("CBOR") // roundtrip CHECK(json::from_cbor(result) == j); + CHECK(json::from_cbor(result, true, false) == j); } } @@ -190,6 +194,7 @@ TEST_CASE("CBOR") // roundtrip CHECK(json::from_cbor(result) == j); + CHECK(json::from_cbor(result, true, false) == j); } } @@ -225,6 +230,7 @@ TEST_CASE("CBOR") // roundtrip CHECK(json::from_cbor(result) == j); + CHECK(json::from_cbor(result, true, false) == j); } } @@ -241,6 +247,7 @@ TEST_CASE("CBOR") // roundtrip CHECK(json::from_cbor(result) == j); + CHECK(json::from_cbor(result, true, false) == j); } SECTION("-256..-24") @@ -271,6 +278,7 @@ TEST_CASE("CBOR") // roundtrip CHECK(json::from_cbor(result) == j); + CHECK(json::from_cbor(result, true, false) == j); } } @@ -300,6 +308,7 @@ TEST_CASE("CBOR") // roundtrip CHECK(json::from_cbor(result) == j); + CHECK(json::from_cbor(result, true, false) == j); } } @@ -330,6 +339,7 @@ TEST_CASE("CBOR") // roundtrip CHECK(json::from_cbor(result) == j); + CHECK(json::from_cbor(result, true, false) == j); } } @@ -362,6 +372,7 @@ TEST_CASE("CBOR") // roundtrip CHECK(json::from_cbor(result) == j); + CHECK(json::from_cbor(result, true, false) == j); } } @@ -396,6 +407,7 @@ TEST_CASE("CBOR") // roundtrip CHECK(json::from_cbor(result) == j); + CHECK(json::from_cbor(result, true, false) == j); } } @@ -438,6 +450,7 @@ TEST_CASE("CBOR") // roundtrip CHECK(json::from_cbor(result) == j); + CHECK(json::from_cbor(result, true, false) == j); } } @@ -488,6 +501,7 @@ TEST_CASE("CBOR") // roundtrip CHECK(json::from_cbor(result) == j); + CHECK(json::from_cbor(result, true, false) == j); } } @@ -555,6 +569,7 @@ TEST_CASE("CBOR") // roundtrip CHECK(json::from_cbor(result) == j); + CHECK(json::from_cbor(result, true, false) == j); } } @@ -587,6 +602,7 @@ TEST_CASE("CBOR") // roundtrip CHECK(json::from_cbor(result) == j); + CHECK(json::from_cbor(result, true, false) == j); } } @@ -620,6 +636,7 @@ TEST_CASE("CBOR") // roundtrip CHECK(json::from_cbor(result) == j); + CHECK(json::from_cbor(result, true, false) == j); } } @@ -661,6 +678,7 @@ TEST_CASE("CBOR") // roundtrip CHECK(json::from_cbor(result) == j); + CHECK(json::from_cbor(result, true, false) == j); } } @@ -710,6 +728,7 @@ TEST_CASE("CBOR") // roundtrip CHECK(json::from_cbor(result) == j); + CHECK(json::from_cbor(result, true, false) == j); } } } @@ -730,6 +749,8 @@ TEST_CASE("CBOR") // roundtrip CHECK(json::from_cbor(result) == j); CHECK(json::from_cbor(result) == v); + + CHECK(json::from_cbor(result, true, false) == j); } } @@ -742,12 +763,14 @@ TEST_CASE("CBOR") CHECK_THROWS_AS(json::from_cbor(std::vector({0xf9})), json::parse_error&); CHECK_THROWS_WITH(json::from_cbor(std::vector({0xf9})), "[json.exception.parse_error.110] parse error at 2: unexpected end of input"); + CHECK(json::from_cbor(std::vector({0xf9}), true, false).is_discarded()); } SECTION("only one byte follows") { CHECK_THROWS_AS(json::from_cbor(std::vector({0xf9, 0x7c})), json::parse_error&); CHECK_THROWS_WITH(json::from_cbor(std::vector({0xf9, 0x7c})), "[json.exception.parse_error.110] parse error at 3: unexpected end of input"); + CHECK(json::from_cbor(std::vector({0xf9, 0x7c}), true, false).is_discarded()); } } @@ -868,6 +891,7 @@ TEST_CASE("CBOR") // roundtrip CHECK(json::from_cbor(result) == j); + CHECK(json::from_cbor(result, true, false) == j); } } @@ -899,6 +923,7 @@ TEST_CASE("CBOR") // roundtrip CHECK(json::from_cbor(result) == j); + CHECK(json::from_cbor(result, true, false) == j); } } @@ -931,6 +956,7 @@ TEST_CASE("CBOR") // roundtrip CHECK(json::from_cbor(result) == j); + CHECK(json::from_cbor(result, true, false) == j); } } @@ -965,6 +991,7 @@ TEST_CASE("CBOR") // roundtrip CHECK(json::from_cbor(result) == j); + CHECK(json::from_cbor(result, true, false) == j); } } } @@ -980,6 +1007,7 @@ TEST_CASE("CBOR") // roundtrip CHECK(json::from_cbor(result) == j); + CHECK(json::from_cbor(result, true, false) == j); } SECTION("[null]") @@ -991,6 +1019,7 @@ TEST_CASE("CBOR") // roundtrip CHECK(json::from_cbor(result) == j); + CHECK(json::from_cbor(result, true, false) == j); } SECTION("[1,2,3,4,5]") @@ -1002,6 +1031,7 @@ TEST_CASE("CBOR") // roundtrip CHECK(json::from_cbor(result) == j); + CHECK(json::from_cbor(result, true, false) == j); } SECTION("[[[[]]]]") @@ -1013,6 +1043,7 @@ TEST_CASE("CBOR") // roundtrip CHECK(json::from_cbor(result) == j); + CHECK(json::from_cbor(result, true, false) == j); } SECTION("array with uint16_t elements") @@ -1027,6 +1058,7 @@ TEST_CASE("CBOR") // roundtrip CHECK(json::from_cbor(result) == j); + CHECK(json::from_cbor(result, true, false) == j); } SECTION("array with uint32_t elements") @@ -1043,6 +1075,7 @@ TEST_CASE("CBOR") // roundtrip CHECK(json::from_cbor(result) == j); + CHECK(json::from_cbor(result, true, false) == j); } /* @@ -1079,6 +1112,7 @@ TEST_CASE("CBOR") // roundtrip CHECK(json::from_cbor(result) == j); + CHECK(json::from_cbor(result, true, false) == j); } SECTION("{\"\":null}") @@ -1090,6 +1124,7 @@ TEST_CASE("CBOR") // roundtrip CHECK(json::from_cbor(result) == j); + CHECK(json::from_cbor(result, true, false) == j); } SECTION("{\"a\": {\"b\": {\"c\": {}}}}") @@ -1104,6 +1139,7 @@ TEST_CASE("CBOR") // roundtrip CHECK(json::from_cbor(result) == j); + CHECK(json::from_cbor(result, true, false) == j); } SECTION("object with uint8_t elements") @@ -1130,6 +1166,7 @@ TEST_CASE("CBOR") CHECK(result[1] == 0xff); // size byte (0xff) // roundtrip CHECK(json::from_cbor(result) == j); + CHECK(json::from_cbor(result, true, false) == j); } SECTION("object with uint16_t elements") @@ -1158,6 +1195,7 @@ TEST_CASE("CBOR") // roundtrip CHECK(json::from_cbor(result) == j); + CHECK(json::from_cbor(result, true, false) == j); } SECTION("object with uint32_t elements") @@ -1188,6 +1226,7 @@ TEST_CASE("CBOR") // roundtrip CHECK(json::from_cbor(result) == j); + CHECK(json::from_cbor(result, true, false) == j); } } } @@ -1229,6 +1268,7 @@ TEST_CASE("CBOR") CHECK_THROWS_AS(json::from_cbor(std::vector()), json::parse_error&); CHECK_THROWS_WITH(json::from_cbor(std::vector()), "[json.exception.parse_error.110] parse error at 1: unexpected end of input"); + CHECK(json::from_cbor(std::vector(), true, false).is_discarded()); } SECTION("too short byte vector") @@ -1248,6 +1288,10 @@ TEST_CASE("CBOR") CHECK_THROWS_AS(json::from_cbor(std::vector({0x1b, 0x00, 0x00, 0x00, 0x00, 0x00})), json::parse_error&); CHECK_THROWS_AS(json::from_cbor(std::vector({0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00})), json::parse_error&); CHECK_THROWS_AS(json::from_cbor(std::vector({0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00})), json::parse_error&); + CHECK_THROWS_AS(json::from_cbor(std::vector({0x62})), json::parse_error&); + CHECK_THROWS_AS(json::from_cbor(std::vector({0x62, 0x60})), json::parse_error&); + CHECK_THROWS_AS(json::from_cbor(std::vector({0x7F})), json::parse_error&); + CHECK_THROWS_AS(json::from_cbor(std::vector({0x7F, 0x60})), json::parse_error&); CHECK_THROWS_WITH(json::from_cbor(std::vector({0x18})), "[json.exception.parse_error.110] parse error at 2: unexpected end of input"); @@ -1279,6 +1323,34 @@ TEST_CASE("CBOR") "[json.exception.parse_error.110] parse error at 8: unexpected end of input"); CHECK_THROWS_WITH(json::from_cbor(std::vector({0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00})), "[json.exception.parse_error.110] parse error at 9: unexpected end of input"); + CHECK_THROWS_WITH(json::from_cbor(std::vector({0x62})), + "[json.exception.parse_error.110] parse error at 2: unexpected end of input"); + CHECK_THROWS_WITH(json::from_cbor(std::vector({0x62, 0x60})), + "[json.exception.parse_error.110] parse error at 3: unexpected end of input"); + CHECK_THROWS_WITH(json::from_cbor(std::vector({0x7F})), + "[json.exception.parse_error.110] parse error at 2: unexpected end of input"); + CHECK_THROWS_WITH(json::from_cbor(std::vector({0x7F, 0x60})), + "[json.exception.parse_error.110] parse error at 3: unexpected end of input"); + + CHECK(json::from_cbor(std::vector({0x18}), true, false).is_discarded()); + CHECK(json::from_cbor(std::vector({0x19}), true, false).is_discarded()); + CHECK(json::from_cbor(std::vector({0x19, 0x00}), true, false).is_discarded()); + CHECK(json::from_cbor(std::vector({0x1a}), true, false).is_discarded()); + CHECK(json::from_cbor(std::vector({0x1a, 0x00}), true, false).is_discarded()); + CHECK(json::from_cbor(std::vector({0x1a, 0x00, 0x00}), true, false).is_discarded()); + CHECK(json::from_cbor(std::vector({0x1a, 0x00, 0x00, 0x00}), true, false).is_discarded()); + CHECK(json::from_cbor(std::vector({0x1b}), true, false).is_discarded()); + CHECK(json::from_cbor(std::vector({0x1b, 0x00}), true, false).is_discarded()); + CHECK(json::from_cbor(std::vector({0x1b, 0x00, 0x00}), true, false).is_discarded()); + CHECK(json::from_cbor(std::vector({0x1b, 0x00, 0x00, 0x00}), true, false).is_discarded()); + CHECK(json::from_cbor(std::vector({0x1b, 0x00, 0x00, 0x00, 0x00}), true, false).is_discarded()); + CHECK(json::from_cbor(std::vector({0x1b, 0x00, 0x00, 0x00, 0x00, 0x00}), true, false).is_discarded()); + CHECK(json::from_cbor(std::vector({0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}), true, false).is_discarded()); + CHECK(json::from_cbor(std::vector({0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}), true, false).is_discarded()); + CHECK(json::from_cbor(std::vector({0x62}), true, false).is_discarded()); + CHECK(json::from_cbor(std::vector({0x62, 0x60}), true, false).is_discarded()); + CHECK(json::from_cbor(std::vector({0x7F}), true, false).is_discarded()); + CHECK(json::from_cbor(std::vector({0x7F, 0x60}), true, false).is_discarded()); } SECTION("unsupported bytes") @@ -1288,9 +1360,12 @@ TEST_CASE("CBOR") CHECK_THROWS_AS(json::from_cbor(std::vector({0x1c})), json::parse_error&); CHECK_THROWS_WITH(json::from_cbor(std::vector({0x1c})), "[json.exception.parse_error.112] parse error at 1: error reading CBOR; last byte: 0x1C"); + CHECK(json::from_cbor(std::vector({0x1c}), true, false).is_discarded()); + CHECK_THROWS_AS(json::from_cbor(std::vector({0xf8})), json::parse_error&); CHECK_THROWS_WITH(json::from_cbor(std::vector({0xf8})), "[json.exception.parse_error.112] parse error at 1: error reading CBOR; last byte: 0xF8"); + CHECK(json::from_cbor(std::vector({0xf8}), true, false).is_discarded()); } SECTION("all unsupported bytes") @@ -1340,6 +1415,7 @@ TEST_CASE("CBOR") }) { CHECK_THROWS_AS(json::from_cbor(std::vector({static_cast(byte)})), json::parse_error&); + CHECK(json::from_cbor(std::vector({static_cast(byte)}), true, false).is_discarded()); } } } @@ -1349,6 +1425,7 @@ TEST_CASE("CBOR") CHECK_THROWS_AS(json::from_cbor(std::vector({0xa1, 0xff, 0x01})), json::parse_error&); CHECK_THROWS_WITH(json::from_cbor(std::vector({0xa1, 0xff, 0x01})), "[json.exception.parse_error.113] parse error at 2: expected a CBOR string; last byte: 0xFF"); + CHECK(json::from_cbor(std::vector({0xa1, 0xff, 0x01}), true, false).is_discarded()); } SECTION("strict mode") @@ -1358,6 +1435,7 @@ TEST_CASE("CBOR") { const auto result = json::from_cbor(vec, false); CHECK(result == json()); + CHECK(not json::from_cbor(vec, false, false).is_discarded()); } SECTION("strict mode") @@ -1365,6 +1443,7 @@ TEST_CASE("CBOR") CHECK_THROWS_AS(json::from_cbor(vec), json::parse_error&); CHECK_THROWS_WITH(json::from_cbor(vec), "[json.exception.parse_error.110] parse error at 2: expected end of input"); + CHECK(json::from_cbor(vec, true, false).is_discarded()); } } } From 99ecca55c459973432e560159260d51f9f6eb950 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Tue, 20 Mar 2018 18:49:10 +0100 Subject: [PATCH 28/43] :white_check_mark: improved test coverage --- Makefile | 2 +- .../nlohmann/detail/input/binary_reader.hpp | 8 +- single_include/nlohmann/json.hpp | 9 +- test/src/unit-cbor.cpp | 13 ++ test/src/unit-msgpack.cpp | 70 +++++++ test/src/unit-ubjson.cpp | 172 ++++++++++++++++++ 6 files changed, 269 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index ed0a7a5c..1e121788 100644 --- a/Makefile +++ b/Makefile @@ -85,7 +85,7 @@ coverage: mkdir build_coverage cd build_coverage ; CXX=g++-5 cmake .. -GNinja -DJSON_Coverage=ON -DJSON_MultipleHeaders=ON cd build_coverage ; ninja - cd build_coverage ; ctest -j10 + cd build_coverage ; ctest -E '.*_default' -j10 cd build_coverage ; ninja lcov_html open build_coverage/test/html/index.html diff --git a/include/nlohmann/detail/input/binary_reader.hpp b/include/nlohmann/detail/input/binary_reader.hpp index 429502ea..d3c09bc9 100644 --- a/include/nlohmann/detail/input/binary_reader.hpp +++ b/include/nlohmann/detail/input/binary_reader.hpp @@ -1359,8 +1359,8 @@ class binary_reader default: { - result = std::size_t(-1); - return true; + auto last_token = get_token_string(); + return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, "byte after '#' must denote a number type; last byte: 0x" + last_token)); } } } @@ -1393,6 +1393,10 @@ class binary_reader get_ignore_noop(); if (JSON_UNLIKELY(current != '#')) { + if (JSON_UNLIKELY(not unexpect_eof())) + { + return false; + } auto last_token = get_token_string(); return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, "expected '#' after UBJSON type information; last byte: 0x" + last_token)); } diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index ea4f1995..57eba455 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -5699,6 +5699,7 @@ class binary_reader using json_sax_t = json_sax; public: + /// the supported binary input formats enum class binary_format_t { cbor, msgpack, ubjson }; /*! @@ -7016,8 +7017,8 @@ class binary_reader default: { - result = std::size_t(-1); - return true; + auto last_token = get_token_string(); + return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, "byte after '#' must denote a number type; last byte: 0x" + last_token)); } } } @@ -7050,6 +7051,10 @@ class binary_reader get_ignore_noop(); if (JSON_UNLIKELY(current != '#')) { + if (JSON_UNLIKELY(not unexpect_eof())) + { + return false; + } auto last_token = get_token_string(); return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, "expected '#' after UBJSON type information; last byte: 0x" + last_token)); } diff --git a/test/src/unit-cbor.cpp b/test/src/unit-cbor.cpp index e8b8c6f4..b0efe6bc 100644 --- a/test/src/unit-cbor.cpp +++ b/test/src/unit-cbor.cpp @@ -1292,6 +1292,10 @@ TEST_CASE("CBOR") CHECK_THROWS_AS(json::from_cbor(std::vector({0x62, 0x60})), json::parse_error&); CHECK_THROWS_AS(json::from_cbor(std::vector({0x7F})), json::parse_error&); CHECK_THROWS_AS(json::from_cbor(std::vector({0x7F, 0x60})), json::parse_error&); + CHECK_THROWS_AS(json::from_cbor(std::vector({0x82, 0x01})), json::parse_error&); + CHECK_THROWS_AS(json::from_cbor(std::vector({0x9F, 0x01})), json::parse_error&); + CHECK_THROWS_AS(json::from_cbor(std::vector({0xBF, 0x61, 0x61, 0xF5})), json::parse_error&); + CHECK_THROWS_WITH(json::from_cbor(std::vector({0x18})), "[json.exception.parse_error.110] parse error at 2: unexpected end of input"); @@ -1331,6 +1335,12 @@ TEST_CASE("CBOR") "[json.exception.parse_error.110] parse error at 2: unexpected end of input"); CHECK_THROWS_WITH(json::from_cbor(std::vector({0x7F, 0x60})), "[json.exception.parse_error.110] parse error at 3: unexpected end of input"); + CHECK_THROWS_WITH(json::from_cbor(std::vector({0x82, 0x01})), + "[json.exception.parse_error.110] parse error at 3: unexpected end of input"); + CHECK_THROWS_WITH(json::from_cbor(std::vector({0x9F, 0x01})), + "[json.exception.parse_error.110] parse error at 3: unexpected end of input"); + CHECK_THROWS_WITH(json::from_cbor(std::vector({0xBF, 0x61, 0x61, 0xF5})), + "[json.exception.parse_error.110] parse error at 5: unexpected end of input"); CHECK(json::from_cbor(std::vector({0x18}), true, false).is_discarded()); CHECK(json::from_cbor(std::vector({0x19}), true, false).is_discarded()); @@ -1351,6 +1361,9 @@ TEST_CASE("CBOR") CHECK(json::from_cbor(std::vector({0x62, 0x60}), true, false).is_discarded()); CHECK(json::from_cbor(std::vector({0x7F}), true, false).is_discarded()); CHECK(json::from_cbor(std::vector({0x7F, 0x60}), true, false).is_discarded()); + CHECK(json::from_cbor(std::vector({0x82, 0x01}), true, false).is_discarded()); + CHECK(json::from_cbor(std::vector({0x9F, 0x01}), true, false).is_discarded()); + CHECK(json::from_cbor(std::vector({0xBF, 0x61, 0x61, 0xF5}), true, false).is_discarded()); } SECTION("unsupported bytes") diff --git a/test/src/unit-msgpack.cpp b/test/src/unit-msgpack.cpp index d8bdb08b..9cb3d18b 100644 --- a/test/src/unit-msgpack.cpp +++ b/test/src/unit-msgpack.cpp @@ -54,6 +54,7 @@ TEST_CASE("MessagePack") // roundtrip CHECK(json::from_msgpack(result) == j); + CHECK(json::from_msgpack(result, true, false) == j); } SECTION("boolean") @@ -67,6 +68,7 @@ TEST_CASE("MessagePack") // roundtrip CHECK(json::from_msgpack(result) == j); + CHECK(json::from_msgpack(result, true, false) == j); } SECTION("false") @@ -78,6 +80,7 @@ TEST_CASE("MessagePack") // roundtrip CHECK(json::from_msgpack(result) == j); + CHECK(json::from_msgpack(result, true, false) == j); } } @@ -111,6 +114,7 @@ TEST_CASE("MessagePack") // roundtrip CHECK(json::from_msgpack(result) == j); + CHECK(json::from_msgpack(result, true, false) == j); } } @@ -141,6 +145,7 @@ TEST_CASE("MessagePack") // roundtrip CHECK(json::from_msgpack(result) == j); + CHECK(json::from_msgpack(result, true, false) == j); } } @@ -174,6 +179,7 @@ TEST_CASE("MessagePack") // roundtrip CHECK(json::from_msgpack(result) == j); + CHECK(json::from_msgpack(result, true, false) == j); } } @@ -208,6 +214,7 @@ TEST_CASE("MessagePack") // roundtrip CHECK(json::from_msgpack(result) == j); + CHECK(json::from_msgpack(result, true, false) == j); } } @@ -250,6 +257,7 @@ TEST_CASE("MessagePack") // roundtrip CHECK(json::from_msgpack(result) == j); + CHECK(json::from_msgpack(result, true, false) == j); } } @@ -300,6 +308,7 @@ TEST_CASE("MessagePack") // roundtrip CHECK(json::from_msgpack(result) == j); + CHECK(json::from_msgpack(result, true, false) == j); } } @@ -331,6 +340,7 @@ TEST_CASE("MessagePack") // roundtrip CHECK(json::from_msgpack(result) == j); + CHECK(json::from_msgpack(result, true, false) == j); } } @@ -347,6 +357,7 @@ TEST_CASE("MessagePack") // roundtrip CHECK(json::from_msgpack(result) == j); + CHECK(json::from_msgpack(result, true, false) == j); } SECTION("-32768..-129 (int 16)") @@ -379,6 +390,7 @@ TEST_CASE("MessagePack") // roundtrip CHECK(json::from_msgpack(result) == j); + CHECK(json::from_msgpack(result, true, false) == j); } } @@ -423,6 +435,7 @@ TEST_CASE("MessagePack") // roundtrip CHECK(json::from_msgpack(result) == j); + CHECK(json::from_msgpack(result, true, false) == j); } } @@ -472,6 +485,7 @@ TEST_CASE("MessagePack") // roundtrip CHECK(json::from_msgpack(result) == j); + CHECK(json::from_msgpack(result, true, false) == j); } } } @@ -504,6 +518,7 @@ TEST_CASE("MessagePack") // roundtrip CHECK(json::from_msgpack(result) == j); + CHECK(json::from_msgpack(result, true, false) == j); } } @@ -536,6 +551,7 @@ TEST_CASE("MessagePack") // roundtrip CHECK(json::from_msgpack(result) == j); + CHECK(json::from_msgpack(result, true, false) == j); } } @@ -569,6 +585,7 @@ TEST_CASE("MessagePack") // roundtrip CHECK(json::from_msgpack(result) == j); + CHECK(json::from_msgpack(result, true, false) == j); } } @@ -610,6 +627,7 @@ TEST_CASE("MessagePack") // roundtrip CHECK(json::from_msgpack(result) == j); + CHECK(json::from_msgpack(result, true, false) == j); } } @@ -659,6 +677,7 @@ TEST_CASE("MessagePack") // roundtrip CHECK(json::from_msgpack(result) == j); + CHECK(json::from_msgpack(result, true, false) == j); } } } @@ -679,6 +698,7 @@ TEST_CASE("MessagePack") // roundtrip CHECK(json::from_msgpack(result) == j); CHECK(json::from_msgpack(result) == v); + CHECK(json::from_msgpack(result, true, false) == j); } } } @@ -727,6 +747,7 @@ TEST_CASE("MessagePack") // roundtrip CHECK(json::from_msgpack(result) == j); + CHECK(json::from_msgpack(result, true, false) == j); } } @@ -758,6 +779,7 @@ TEST_CASE("MessagePack") // roundtrip CHECK(json::from_msgpack(result) == j); + CHECK(json::from_msgpack(result, true, false) == j); } } @@ -790,6 +812,7 @@ TEST_CASE("MessagePack") // roundtrip CHECK(json::from_msgpack(result) == j); + CHECK(json::from_msgpack(result, true, false) == j); } } @@ -824,6 +847,7 @@ TEST_CASE("MessagePack") // roundtrip CHECK(json::from_msgpack(result) == j); + CHECK(json::from_msgpack(result, true, false) == j); } } } @@ -839,6 +863,7 @@ TEST_CASE("MessagePack") // roundtrip CHECK(json::from_msgpack(result) == j); + CHECK(json::from_msgpack(result, true, false) == j); } SECTION("[null]") @@ -850,6 +875,7 @@ TEST_CASE("MessagePack") // roundtrip CHECK(json::from_msgpack(result) == j); + CHECK(json::from_msgpack(result, true, false) == j); } SECTION("[1,2,3,4,5]") @@ -861,6 +887,7 @@ TEST_CASE("MessagePack") // roundtrip CHECK(json::from_msgpack(result) == j); + CHECK(json::from_msgpack(result, true, false) == j); } SECTION("[[[[]]]]") @@ -872,6 +899,7 @@ TEST_CASE("MessagePack") // roundtrip CHECK(json::from_msgpack(result) == j); + CHECK(json::from_msgpack(result, true, false) == j); } SECTION("array 16") @@ -886,6 +914,7 @@ TEST_CASE("MessagePack") // roundtrip CHECK(json::from_msgpack(result) == j); + CHECK(json::from_msgpack(result, true, false) == j); } SECTION("array 32") @@ -909,6 +938,7 @@ TEST_CASE("MessagePack") // roundtrip CHECK(json::from_msgpack(result) == j); + CHECK(json::from_msgpack(result, true, false) == j); } } @@ -923,6 +953,7 @@ TEST_CASE("MessagePack") // roundtrip CHECK(json::from_msgpack(result) == j); + CHECK(json::from_msgpack(result, true, false) == j); } SECTION("{\"\":null}") @@ -934,6 +965,7 @@ TEST_CASE("MessagePack") // roundtrip CHECK(json::from_msgpack(result) == j); + CHECK(json::from_msgpack(result, true, false) == j); } SECTION("{\"a\": {\"b\": {\"c\": {}}}}") @@ -948,6 +980,7 @@ TEST_CASE("MessagePack") // roundtrip CHECK(json::from_msgpack(result) == j); + CHECK(json::from_msgpack(result, true, false) == j); } SECTION("map 16") @@ -971,6 +1004,7 @@ TEST_CASE("MessagePack") // roundtrip CHECK(json::from_msgpack(result) == j); + CHECK(json::from_msgpack(result, true, false) == j); } SECTION("map 32") @@ -1001,6 +1035,7 @@ TEST_CASE("MessagePack") // roundtrip CHECK(json::from_msgpack(result) == j); + CHECK(json::from_msgpack(result, true, false) == j); } } } @@ -1019,10 +1054,12 @@ TEST_CASE("MessagePack") CHECK_THROWS_AS(json::from_msgpack(std::vector()), json::parse_error&); CHECK_THROWS_WITH(json::from_msgpack(std::vector()), "[json.exception.parse_error.110] parse error at 1: unexpected end of input"); + CHECK(json::from_msgpack(std::vector(), true, false).is_discarded()); } SECTION("too short byte vector") { + CHECK_THROWS_AS(json::from_msgpack(std::vector({0x87})), json::parse_error&); CHECK_THROWS_AS(json::from_msgpack(std::vector({0xcc})), json::parse_error&); CHECK_THROWS_AS(json::from_msgpack(std::vector({0xcd})), json::parse_error&); CHECK_THROWS_AS(json::from_msgpack(std::vector({0xcd, 0x00})), json::parse_error&); @@ -1038,7 +1075,11 @@ TEST_CASE("MessagePack") CHECK_THROWS_AS(json::from_msgpack(std::vector({0xcf, 0x00, 0x00, 0x00, 0x00, 0x00})), json::parse_error&); CHECK_THROWS_AS(json::from_msgpack(std::vector({0xcf, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00})), json::parse_error&); CHECK_THROWS_AS(json::from_msgpack(std::vector({0xcf, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00})), json::parse_error&); + CHECK_THROWS_AS(json::from_msgpack(std::vector({0xa5, 0x68, 0x65})), json::parse_error&); + CHECK_THROWS_AS(json::from_msgpack(std::vector({0x92, 0x01})), json::parse_error&); + CHECK_THROWS_WITH(json::from_msgpack(std::vector({0x87})), + "[json.exception.parse_error.110] parse error at 2: unexpected end of input"); CHECK_THROWS_WITH(json::from_msgpack(std::vector({0xcc})), "[json.exception.parse_error.110] parse error at 2: unexpected end of input"); CHECK_THROWS_WITH(json::from_msgpack(std::vector({0xcd})), @@ -1069,6 +1110,29 @@ TEST_CASE("MessagePack") "[json.exception.parse_error.110] parse error at 8: unexpected end of input"); CHECK_THROWS_WITH(json::from_msgpack(std::vector({0xcf, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00})), "[json.exception.parse_error.110] parse error at 9: unexpected end of input"); + CHECK_THROWS_WITH(json::from_msgpack(std::vector({0xa5, 0x68, 0x65})), + "[json.exception.parse_error.110] parse error at 4: unexpected end of input"); + CHECK_THROWS_WITH(json::from_msgpack(std::vector({0x92, 0x01})), + "[json.exception.parse_error.110] parse error at 3: unexpected end of input"); + + CHECK(json::from_msgpack(std::vector({0x87}), true, false).is_discarded()); + CHECK(json::from_msgpack(std::vector({0xcc}), true, false).is_discarded()); + CHECK(json::from_msgpack(std::vector({0xcd}), true, false).is_discarded()); + CHECK(json::from_msgpack(std::vector({0xcd, 0x00}), true, false).is_discarded()); + CHECK(json::from_msgpack(std::vector({0xce}), true, false).is_discarded()); + CHECK(json::from_msgpack(std::vector({0xce, 0x00}), true, false).is_discarded()); + CHECK(json::from_msgpack(std::vector({0xce, 0x00, 0x00}), true, false).is_discarded()); + CHECK(json::from_msgpack(std::vector({0xce, 0x00, 0x00, 0x00}), true, false).is_discarded()); + CHECK(json::from_msgpack(std::vector({0xcf}), true, false).is_discarded()); + CHECK(json::from_msgpack(std::vector({0xcf, 0x00}), true, false).is_discarded()); + CHECK(json::from_msgpack(std::vector({0xcf, 0x00, 0x00}), true, false).is_discarded()); + CHECK(json::from_msgpack(std::vector({0xcf, 0x00, 0x00, 0x00}), true, false).is_discarded()); + CHECK(json::from_msgpack(std::vector({0xcf, 0x00, 0x00, 0x00, 0x00}), true, false).is_discarded()); + CHECK(json::from_msgpack(std::vector({0xcf, 0x00, 0x00, 0x00, 0x00, 0x00}), true, false).is_discarded()); + CHECK(json::from_msgpack(std::vector({0xcf, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}), true, false).is_discarded()); + CHECK(json::from_msgpack(std::vector({0xcf, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}), true, false).is_discarded()); + CHECK(json::from_msgpack(std::vector({0xa5, 0x68, 0x65}), true, false).is_discarded()); + CHECK(json::from_msgpack(std::vector({0x92, 0x01}), true, false).is_discarded()); } SECTION("unsupported bytes") @@ -1078,9 +1142,12 @@ TEST_CASE("MessagePack") CHECK_THROWS_AS(json::from_msgpack(std::vector({0xc1})), json::parse_error&); CHECK_THROWS_WITH(json::from_msgpack(std::vector({0xc1})), "[json.exception.parse_error.112] parse error at 1: error reading MessagePack; last byte: 0xC1"); + CHECK(json::from_msgpack(std::vector({0xc6}), true, false).is_discarded()); + CHECK_THROWS_AS(json::from_msgpack(std::vector({0xc6})), json::parse_error&); CHECK_THROWS_WITH(json::from_msgpack(std::vector({0xc6})), "[json.exception.parse_error.112] parse error at 1: error reading MessagePack; last byte: 0xC6"); + CHECK(json::from_msgpack(std::vector({0xc6}), true, false).is_discarded()); } SECTION("all unsupported bytes") @@ -1098,6 +1165,7 @@ TEST_CASE("MessagePack") }) { CHECK_THROWS_AS(json::from_msgpack(std::vector({static_cast(byte)})), json::parse_error&); + CHECK(json::from_msgpack(std::vector({static_cast(byte)}), true, false).is_discarded()); } } } @@ -1107,6 +1175,7 @@ TEST_CASE("MessagePack") CHECK_THROWS_AS(json::from_msgpack(std::vector({0x81, 0xff, 0x01})), json::parse_error&); CHECK_THROWS_WITH(json::from_msgpack(std::vector({0x81, 0xff, 0x01})), "[json.exception.parse_error.113] parse error at 2: expected a MessagePack string; last byte: 0xFF"); + CHECK(json::from_msgpack(std::vector({0x81, 0xff, 0x01}), true, false).is_discarded()); } SECTION("strict mode") @@ -1123,6 +1192,7 @@ TEST_CASE("MessagePack") CHECK_THROWS_AS(json::from_msgpack(vec), json::parse_error&); CHECK_THROWS_WITH(json::from_msgpack(vec), "[json.exception.parse_error.110] parse error at 2: expected end of input"); + CHECK(json::from_msgpack(vec, true, false).is_discarded()); } } } diff --git a/test/src/unit-ubjson.cpp b/test/src/unit-ubjson.cpp index 1d7c1046..11774268 100644 --- a/test/src/unit-ubjson.cpp +++ b/test/src/unit-ubjson.cpp @@ -54,6 +54,7 @@ TEST_CASE("UBJSON") // roundtrip CHECK(json::from_ubjson(result) == j); + CHECK(json::from_ubjson(result, true, false) == j); } SECTION("boolean") @@ -67,6 +68,7 @@ TEST_CASE("UBJSON") // roundtrip CHECK(json::from_ubjson(result) == j); + CHECK(json::from_ubjson(result, true, false) == j); } SECTION("false") @@ -78,6 +80,7 @@ TEST_CASE("UBJSON") // roundtrip CHECK(json::from_ubjson(result) == j); + CHECK(json::from_ubjson(result, true, false) == j); } } @@ -140,6 +143,7 @@ TEST_CASE("UBJSON") // roundtrip CHECK(json::from_ubjson(result) == j); + CHECK(json::from_ubjson(result, true, false) == j); } } @@ -186,6 +190,7 @@ TEST_CASE("UBJSON") // roundtrip CHECK(json::from_ubjson(result) == j); + CHECK(json::from_ubjson(result, true, false) == j); } } @@ -219,6 +224,7 @@ TEST_CASE("UBJSON") // roundtrip CHECK(json::from_ubjson(result) == j); + CHECK(json::from_ubjson(result, true, false) == j); } } @@ -239,6 +245,7 @@ TEST_CASE("UBJSON") // roundtrip CHECK(json::from_ubjson(result) == j); + CHECK(json::from_ubjson(result, true, false) == j); } SECTION("-128..-1 (int8)") @@ -269,6 +276,7 @@ TEST_CASE("UBJSON") // roundtrip CHECK(json::from_ubjson(result) == j); + CHECK(json::from_ubjson(result, true, false) == j); } } @@ -301,6 +309,7 @@ TEST_CASE("UBJSON") // roundtrip CHECK(json::from_ubjson(result) == j); + CHECK(json::from_ubjson(result, true, false) == j); } } @@ -333,6 +342,7 @@ TEST_CASE("UBJSON") // roundtrip CHECK(json::from_ubjson(result) == j); + CHECK(json::from_ubjson(result, true, false) == j); } } @@ -367,6 +377,7 @@ TEST_CASE("UBJSON") // roundtrip CHECK(json::from_ubjson(result) == j); + CHECK(json::from_ubjson(result, true, false) == j); } } @@ -409,6 +420,7 @@ TEST_CASE("UBJSON") // roundtrip CHECK(json::from_ubjson(result) == j); + CHECK(json::from_ubjson(result, true, false) == j); } } @@ -457,6 +469,7 @@ TEST_CASE("UBJSON") // roundtrip CHECK(json::from_ubjson(result) == j); + CHECK(json::from_ubjson(result, true, false) == j); } } } @@ -492,6 +505,7 @@ TEST_CASE("UBJSON") // roundtrip CHECK(json::from_ubjson(result) == j); + CHECK(json::from_ubjson(result, true, false) == j); } } @@ -524,6 +538,7 @@ TEST_CASE("UBJSON") // roundtrip CHECK(json::from_ubjson(result) == j); + CHECK(json::from_ubjson(result, true, false) == j); } } @@ -557,6 +572,7 @@ TEST_CASE("UBJSON") // roundtrip CHECK(json::from_ubjson(result) == j); + CHECK(json::from_ubjson(result, true, false) == j); } } @@ -598,6 +614,7 @@ TEST_CASE("UBJSON") // roundtrip CHECK(json::from_ubjson(result) == j); + CHECK(json::from_ubjson(result, true, false) == j); } } @@ -645,6 +662,7 @@ TEST_CASE("UBJSON") // roundtrip CHECK(json::from_ubjson(result) == j); + CHECK(json::from_ubjson(result, true, false) == j); } } } @@ -665,6 +683,7 @@ TEST_CASE("UBJSON") // roundtrip CHECK(json::from_ubjson(result) == j); CHECK(json::from_ubjson(result) == v); + CHECK(json::from_ubjson(result, true, false) == j); } } } @@ -703,6 +722,7 @@ TEST_CASE("UBJSON") // roundtrip CHECK(json::from_ubjson(result) == j); + CHECK(json::from_ubjson(result, true, false) == j); } } @@ -735,6 +755,7 @@ TEST_CASE("UBJSON") // roundtrip CHECK(json::from_ubjson(result) == j); + CHECK(json::from_ubjson(result, true, false) == j); } } @@ -768,6 +789,7 @@ TEST_CASE("UBJSON") // roundtrip CHECK(json::from_ubjson(result) == j); + CHECK(json::from_ubjson(result, true, false) == j); } } @@ -803,6 +825,7 @@ TEST_CASE("UBJSON") // roundtrip CHECK(json::from_ubjson(result) == j); + CHECK(json::from_ubjson(result, true, false) == j); } } } @@ -820,6 +843,7 @@ TEST_CASE("UBJSON") // roundtrip CHECK(json::from_ubjson(result) == j); + CHECK(json::from_ubjson(result, true, false) == j); } SECTION("size=true type=false") @@ -831,6 +855,7 @@ TEST_CASE("UBJSON") // roundtrip CHECK(json::from_ubjson(result) == j); + CHECK(json::from_ubjson(result, true, false) == j); } SECTION("size=true type=true") @@ -842,6 +867,7 @@ TEST_CASE("UBJSON") // roundtrip CHECK(json::from_ubjson(result) == j); + CHECK(json::from_ubjson(result, true, false) == j); } } @@ -856,6 +882,7 @@ TEST_CASE("UBJSON") // roundtrip CHECK(json::from_ubjson(result) == j); + CHECK(json::from_ubjson(result, true, false) == j); } SECTION("size=true type=false") @@ -867,6 +894,7 @@ TEST_CASE("UBJSON") // roundtrip CHECK(json::from_ubjson(result) == j); + CHECK(json::from_ubjson(result, true, false) == j); } SECTION("size=true type=true") @@ -878,6 +906,7 @@ TEST_CASE("UBJSON") // roundtrip CHECK(json::from_ubjson(result) == j); + CHECK(json::from_ubjson(result, true, false) == j); } } @@ -892,6 +921,7 @@ TEST_CASE("UBJSON") // roundtrip CHECK(json::from_ubjson(result) == j); + CHECK(json::from_ubjson(result, true, false) == j); } SECTION("size=true type=false") @@ -903,6 +933,7 @@ TEST_CASE("UBJSON") // roundtrip CHECK(json::from_ubjson(result) == j); + CHECK(json::from_ubjson(result, true, false) == j); } SECTION("size=true type=true") @@ -914,6 +945,7 @@ TEST_CASE("UBJSON") // roundtrip CHECK(json::from_ubjson(result) == j); + CHECK(json::from_ubjson(result, true, false) == j); } } @@ -928,6 +960,7 @@ TEST_CASE("UBJSON") // roundtrip CHECK(json::from_ubjson(result) == j); + CHECK(json::from_ubjson(result, true, false) == j); } SECTION("size=true type=false") @@ -939,6 +972,7 @@ TEST_CASE("UBJSON") // roundtrip CHECK(json::from_ubjson(result) == j); + CHECK(json::from_ubjson(result, true, false) == j); } SECTION("size=true type=true") @@ -950,6 +984,7 @@ TEST_CASE("UBJSON") // roundtrip CHECK(json::from_ubjson(result) == j); + CHECK(json::from_ubjson(result, true, false) == j); } } @@ -966,6 +1001,7 @@ TEST_CASE("UBJSON") // roundtrip CHECK(json::from_ubjson(result) == j); + CHECK(json::from_ubjson(result, true, false) == j); } SECTION("size=true type=false") @@ -982,6 +1018,7 @@ TEST_CASE("UBJSON") // roundtrip CHECK(json::from_ubjson(result) == j); + CHECK(json::from_ubjson(result, true, false) == j); } SECTION("size=true type=true") @@ -993,6 +1030,7 @@ TEST_CASE("UBJSON") // roundtrip CHECK(json::from_ubjson(result) == j); + CHECK(json::from_ubjson(result, true, false) == j); } } @@ -1009,6 +1047,7 @@ TEST_CASE("UBJSON") // roundtrip CHECK(json::from_ubjson(result) == j); + CHECK(json::from_ubjson(result, true, false) == j); } SECTION("size=true type=false") @@ -1027,6 +1066,7 @@ TEST_CASE("UBJSON") // roundtrip CHECK(json::from_ubjson(result) == j); + CHECK(json::from_ubjson(result, true, false) == j); } SECTION("size=true type=true") @@ -1038,6 +1078,7 @@ TEST_CASE("UBJSON") // roundtrip CHECK(json::from_ubjson(result) == j); + CHECK(json::from_ubjson(result, true, false) == j); } } } @@ -1055,6 +1096,7 @@ TEST_CASE("UBJSON") // roundtrip CHECK(json::from_ubjson(result) == j); + CHECK(json::from_ubjson(result, true, false) == j); } SECTION("size=true type=false") @@ -1066,6 +1108,7 @@ TEST_CASE("UBJSON") // roundtrip CHECK(json::from_ubjson(result) == j); + CHECK(json::from_ubjson(result, true, false) == j); } SECTION("size=true type=true") @@ -1077,6 +1120,7 @@ TEST_CASE("UBJSON") // roundtrip CHECK(json::from_ubjson(result) == j); + CHECK(json::from_ubjson(result, true, false) == j); } } @@ -1091,6 +1135,7 @@ TEST_CASE("UBJSON") // roundtrip CHECK(json::from_ubjson(result) == j); + CHECK(json::from_ubjson(result, true, false) == j); } SECTION("size=true type=false") @@ -1102,6 +1147,7 @@ TEST_CASE("UBJSON") // roundtrip CHECK(json::from_ubjson(result) == j); + CHECK(json::from_ubjson(result, true, false) == j); } SECTION("size=true type=true") @@ -1113,6 +1159,7 @@ TEST_CASE("UBJSON") // roundtrip CHECK(json::from_ubjson(result) == j); + CHECK(json::from_ubjson(result, true, false) == j); } } @@ -1130,6 +1177,7 @@ TEST_CASE("UBJSON") // roundtrip CHECK(json::from_ubjson(result) == j); + CHECK(json::from_ubjson(result, true, false) == j); } SECTION("size=true type=false") @@ -1144,6 +1192,7 @@ TEST_CASE("UBJSON") // roundtrip CHECK(json::from_ubjson(result) == j); + CHECK(json::from_ubjson(result, true, false) == j); } SECTION("size=true type=true") @@ -1158,6 +1207,7 @@ TEST_CASE("UBJSON") // roundtrip CHECK(json::from_ubjson(result) == j); + CHECK(json::from_ubjson(result, true, false) == j); } } } @@ -1239,6 +1289,7 @@ TEST_CASE("UBJSON") SECTION("optimized version (length only)") { // create vector with two elements of the same type + std::vector v_TU = {'[', '#', 'U', 2, 'T', 'T'}; std::vector v_T = {'[', '#', 'i', 2, 'T', 'T'}; std::vector v_F = {'[', '#', 'i', 2, 'F', 'F'}; std::vector v_Z = {'[', '#', 'i', 2, 'Z', 'Z'}; @@ -1252,6 +1303,7 @@ TEST_CASE("UBJSON") std::vector v_C = {'[', '#', 'i', 2, 'C', 'a', 'C', 'a'}; // check if vector is parsed correctly + CHECK(json::from_ubjson(v_TU) == json({true, true})); CHECK(json::from_ubjson(v_T) == json({true, true})); CHECK(json::from_ubjson(v_F) == json({false, false})); CHECK(json::from_ubjson(v_Z) == json({nullptr, nullptr})); @@ -1378,6 +1430,126 @@ TEST_CASE("UBJSON") CHECK_THROWS_WITH(json::from_ubjson(v), "[json.exception.parse_error.112] parse error at 4: expected '#' after UBJSON type information; last byte: 0x02"); } } + + SECTION("strings") + { + std::vector vS = {'S'}; + CHECK_THROWS_AS(json::from_ubjson(vS), json::parse_error&); + CHECK_THROWS_WITH(json::from_ubjson(vS), "[json.exception.parse_error.110] parse error at 2: unexpected end of input"); + CHECK(json::from_ubjson(vS, true, false).is_discarded()); + + std::vector v = {'S', 'i', '2', 'a'}; + CHECK_THROWS_AS(json::from_ubjson(v), json::parse_error&); + CHECK_THROWS_WITH(json::from_ubjson(v), "[json.exception.parse_error.110] parse error at 5: unexpected end of input"); + CHECK(json::from_ubjson(v, true, false).is_discarded()); + + std::vector vC = {'C'}; + CHECK_THROWS_AS(json::from_ubjson(vC), json::parse_error&); + CHECK_THROWS_WITH(json::from_ubjson(vC), "[json.exception.parse_error.110] parse error at 2: unexpected end of input"); + CHECK(json::from_ubjson(vC, true, false).is_discarded()); + } + + SECTION("sizes") + { + std::vector vU = {'[', '#', 'U'}; + CHECK_THROWS_AS(json::from_ubjson(vU), json::parse_error&); + CHECK_THROWS_WITH(json::from_ubjson(vU), "[json.exception.parse_error.110] parse error at 4: unexpected end of input"); + CHECK(json::from_ubjson(vU, true, false).is_discarded()); + + std::vector vi = {'[', '#', 'i'}; + CHECK_THROWS_AS(json::from_ubjson(vi), json::parse_error&); + CHECK_THROWS_WITH(json::from_ubjson(vi), "[json.exception.parse_error.110] parse error at 4: unexpected end of input"); + CHECK(json::from_ubjson(vi, true, false).is_discarded()); + + std::vector vI = {'[', '#', 'I'}; + CHECK_THROWS_AS(json::from_ubjson(vI), json::parse_error&); + CHECK_THROWS_WITH(json::from_ubjson(vI), "[json.exception.parse_error.110] parse error at 4: unexpected end of input"); + CHECK(json::from_ubjson(vI, true, false).is_discarded()); + + std::vector vl = {'[', '#', 'l'}; + CHECK_THROWS_AS(json::from_ubjson(vl), json::parse_error&); + CHECK_THROWS_WITH(json::from_ubjson(vl), "[json.exception.parse_error.110] parse error at 4: unexpected end of input"); + CHECK(json::from_ubjson(vl, true, false).is_discarded()); + + std::vector vL = {'[', '#', 'L'}; + CHECK_THROWS_AS(json::from_ubjson(vL), json::parse_error&); + CHECK_THROWS_WITH(json::from_ubjson(vL), "[json.exception.parse_error.110] parse error at 4: unexpected end of input"); + CHECK(json::from_ubjson(vL, true, false).is_discarded()); + + std::vector v0 = {'[', '#', 'T', ']'}; + CHECK_THROWS_AS(json::from_ubjson(v0), json::parse_error&); + CHECK_THROWS_WITH(json::from_ubjson(v0), "[json.exception.parse_error.113] parse error at 3: byte after '#' must denote a number type; last byte: 0x54"); + CHECK(json::from_ubjson(v0, true, false).is_discarded()); + } + + SECTION("types") + { + std::vector v0 = {'[', '$'}; + CHECK_THROWS_AS(json::from_ubjson(v0), json::parse_error&); + CHECK_THROWS_WITH(json::from_ubjson(v0), "[json.exception.parse_error.110] parse error at 3: unexpected end of input"); + CHECK(json::from_ubjson(v0, true, false).is_discarded()); + + std::vector vi = {'[', '$', '#'}; + CHECK_THROWS_AS(json::from_ubjson(vi), json::parse_error&); + CHECK_THROWS_WITH(json::from_ubjson(vi), "[json.exception.parse_error.110] parse error at 4: unexpected end of input"); + CHECK(json::from_ubjson(vi, true, false).is_discarded()); + + std::vector vT = {'[', '$', 'T'}; + CHECK_THROWS_AS(json::from_ubjson(vT), json::parse_error&); + CHECK_THROWS_WITH(json::from_ubjson(vT), "[json.exception.parse_error.110] parse error at 4: unexpected end of input"); + CHECK(json::from_ubjson(vT, true, false).is_discarded()); + } + + SECTION("arrays") + { + std::vector vST = {'[', '$', 'i', '#', 'i', 2, 1}; + CHECK_THROWS_AS(json::from_ubjson(vST), json::parse_error&); + CHECK_THROWS_WITH(json::from_ubjson(vST), "[json.exception.parse_error.110] parse error at 8: unexpected end of input"); + CHECK(json::from_ubjson(vST, true, false).is_discarded()); + + std::vector vS = {'[', '#', 'i', 2, 'i', 1}; + CHECK_THROWS_AS(json::from_ubjson(vS), json::parse_error&); + CHECK_THROWS_WITH(json::from_ubjson(vS), "[json.exception.parse_error.110] parse error at 7: unexpected end of input"); + CHECK(json::from_ubjson(vS, true, false).is_discarded()); + + std::vector v = {'[', 'i', 2, 'i', 1}; + CHECK_THROWS_AS(json::from_ubjson(v), json::parse_error&); + CHECK_THROWS_WITH(json::from_ubjson(v), "[json.exception.parse_error.110] parse error at 6: unexpected end of input"); + CHECK(json::from_ubjson(v, true, false).is_discarded()); + } + + SECTION("objects") + { + std::vector vST = {'{', '$', 'i', '#', 'i', 2, 'i', 1, 'a', 1}; + CHECK_THROWS_AS(json::from_ubjson(vST), json::parse_error&); + CHECK_THROWS_WITH(json::from_ubjson(vST), "[json.exception.parse_error.110] parse error at 11: unexpected end of input"); + CHECK(json::from_ubjson(vST, true, false).is_discarded()); + + std::vector vT = {'{', '$', 'i', 'i', 1, 'a', 1}; + CHECK_THROWS_AS(json::from_ubjson(vT), json::parse_error&); + CHECK_THROWS_WITH(json::from_ubjson(vT), "[json.exception.parse_error.112] parse error at 4: expected '#' after UBJSON type information; last byte: 0x69"); + CHECK(json::from_ubjson(vT, true, false).is_discarded()); + + std::vector vS = {'{', '#', 'i', 2, 'i', 1, 'a', 'i', 1}; + CHECK_THROWS_AS(json::from_ubjson(vS), json::parse_error&); + CHECK_THROWS_WITH(json::from_ubjson(vS), "[json.exception.parse_error.110] parse error at 10: unexpected end of input"); + CHECK(json::from_ubjson(vS, true, false).is_discarded()); + + std::vector v = {'{', 'i', 1, 'a', 'i', 1}; + CHECK_THROWS_AS(json::from_ubjson(v), json::parse_error&); + CHECK_THROWS_WITH(json::from_ubjson(v), "[json.exception.parse_error.110] parse error at 7: unexpected end of input"); + CHECK(json::from_ubjson(v, true, false).is_discarded()); + + std::vector v2 = {'{', 'i', 1, 'a', 'i', 1, 'i'}; + CHECK_THROWS_AS(json::from_ubjson(v2), json::parse_error&); + CHECK_THROWS_WITH(json::from_ubjson(v2), "[json.exception.parse_error.110] parse error at 8: unexpected end of input"); + CHECK(json::from_ubjson(v2, true, false).is_discarded()); + + std::vector v3 = {'{', 'i', 1, 'a'}; + CHECK_THROWS_AS(json::from_ubjson(v3), json::parse_error&); + CHECK_THROWS_WITH(json::from_ubjson(v3), "[json.exception.parse_error.110] parse error at 5: unexpected end of input"); + CHECK(json::from_ubjson(v3, true, false).is_discarded()); + } } SECTION("writing optimized values") From 25f56ff2076aa7fd3e275e92db4eb15a5f111894 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Tue, 20 Mar 2018 19:22:18 +0100 Subject: [PATCH 29/43] :memo: updated documentation --- include/nlohmann/json.hpp | 37 ++++++++++++++++++-------------- single_include/nlohmann/json.hpp | 37 ++++++++++++++++++-------------- 2 files changed, 42 insertions(+), 32 deletions(-) diff --git a/include/nlohmann/json.hpp b/include/nlohmann/json.hpp index 393fa2ea..ff2f7f34 100644 --- a/include/nlohmann/json.hpp +++ b/include/nlohmann/json.hpp @@ -6629,14 +6629,15 @@ class basic_json @sa http://cbor.io @sa @ref to_cbor(const basic_json&) for the analogous serialization - @sa @ref from_msgpack(detail::input_adapter, const bool) for the + @sa @ref from_msgpack(detail::input_adapter, const bool, const bool) for the related MessagePack format - @sa @ref from_ubjson(detail::input_adapter, const bool) for the related - UBJSON format + @sa @ref from_ubjson(detail::input_adapter, const bool, const bool) for the + related UBJSON format @since version 2.0.9; parameter @a start_index since 2.1.1; changed to consume input adapters, removed start_index parameter, and added - @a strict parameter since 3.0.0 + @a strict parameter since 3.0.0; added @allow_exceptions parameter + since 3.2.0 */ static basic_json from_cbor(detail::input_adapter i, const bool strict = true, @@ -6649,7 +6650,7 @@ class basic_json } /*! - @copydoc from_cbor(detail::input_adapter, const bool) + @copydoc from_cbor(detail::input_adapter, const bool, const bool) */ template::value, int> = 0> @@ -6731,14 +6732,15 @@ class basic_json @sa http://msgpack.org @sa @ref to_msgpack(const basic_json&) for the analogous serialization - @sa @ref from_cbor(detail::input_adapter, const bool) for the related CBOR - format - @sa @ref from_ubjson(detail::input_adapter, const bool) for the related - UBJSON format + @sa @ref from_cbor(detail::input_adapter, const bool, const bool) for the + related CBOR format + @sa @ref from_ubjson(detail::input_adapter, const bool, const bool) for + the related UBJSON format @since version 2.0.9; parameter @a start_index since 2.1.1; changed to consume input adapters, removed start_index parameter, and added - @a strict parameter since 3.0.0 + @a strict parameter since 3.0.0; added @allow_exceptions parameter + since 3.2.0 */ static basic_json from_msgpack(detail::input_adapter i, const bool strict = true, @@ -6751,7 +6753,7 @@ class basic_json } /*! - @copydoc from_msgpack(detail::input_adapter, const bool) + @copydoc from_msgpack(detail::input_adapter, const bool, const bool) */ template::value, int> = 0> @@ -6815,12 +6817,12 @@ class basic_json @sa http://ubjson.org @sa @ref to_ubjson(const basic_json&, const bool, const bool) for the analogous serialization - @sa @ref from_cbor(detail::input_adapter, const bool) for the related CBOR - format - @sa @ref from_msgpack(detail::input_adapter, const bool) for the related - MessagePack format + @sa @ref from_cbor(detail::input_adapter, const bool, const bool) for the + related CBOR format + @sa @ref from_msgpack(detail::input_adapter, const bool, const bool) for + the related MessagePack format - @since version 3.1.0 + @since version 3.1.0; added @allow_exceptions parameter since 3.2.0 */ static basic_json from_ubjson(detail::input_adapter i, const bool strict = true, @@ -6832,6 +6834,9 @@ class basic_json return res ? result : basic_json(value_t::discarded); } + /*! + @copydoc from_ubjson(detail::input_adapter, const bool, const bool) + */ template::value, int> = 0> static basic_json from_ubjson(A1 && a1, A2 && a2, diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index 57eba455..67af8de1 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -17378,14 +17378,15 @@ class basic_json @sa http://cbor.io @sa @ref to_cbor(const basic_json&) for the analogous serialization - @sa @ref from_msgpack(detail::input_adapter, const bool) for the + @sa @ref from_msgpack(detail::input_adapter, const bool, const bool) for the related MessagePack format - @sa @ref from_ubjson(detail::input_adapter, const bool) for the related - UBJSON format + @sa @ref from_ubjson(detail::input_adapter, const bool, const bool) for the + related UBJSON format @since version 2.0.9; parameter @a start_index since 2.1.1; changed to consume input adapters, removed start_index parameter, and added - @a strict parameter since 3.0.0 + @a strict parameter since 3.0.0; added @allow_exceptions parameter + since 3.2.0 */ static basic_json from_cbor(detail::input_adapter i, const bool strict = true, @@ -17398,7 +17399,7 @@ class basic_json } /*! - @copydoc from_cbor(detail::input_adapter, const bool) + @copydoc from_cbor(detail::input_adapter, const bool, const bool) */ template::value, int> = 0> @@ -17480,14 +17481,15 @@ class basic_json @sa http://msgpack.org @sa @ref to_msgpack(const basic_json&) for the analogous serialization - @sa @ref from_cbor(detail::input_adapter, const bool) for the related CBOR - format - @sa @ref from_ubjson(detail::input_adapter, const bool) for the related - UBJSON format + @sa @ref from_cbor(detail::input_adapter, const bool, const bool) for the + related CBOR format + @sa @ref from_ubjson(detail::input_adapter, const bool, const bool) for + the related UBJSON format @since version 2.0.9; parameter @a start_index since 2.1.1; changed to consume input adapters, removed start_index parameter, and added - @a strict parameter since 3.0.0 + @a strict parameter since 3.0.0; added @allow_exceptions parameter + since 3.2.0 */ static basic_json from_msgpack(detail::input_adapter i, const bool strict = true, @@ -17500,7 +17502,7 @@ class basic_json } /*! - @copydoc from_msgpack(detail::input_adapter, const bool) + @copydoc from_msgpack(detail::input_adapter, const bool, const bool) */ template::value, int> = 0> @@ -17564,12 +17566,12 @@ class basic_json @sa http://ubjson.org @sa @ref to_ubjson(const basic_json&, const bool, const bool) for the analogous serialization - @sa @ref from_cbor(detail::input_adapter, const bool) for the related CBOR - format - @sa @ref from_msgpack(detail::input_adapter, const bool) for the related - MessagePack format + @sa @ref from_cbor(detail::input_adapter, const bool, const bool) for the + related CBOR format + @sa @ref from_msgpack(detail::input_adapter, const bool, const bool) for + the related MessagePack format - @since version 3.1.0 + @since version 3.1.0; added @allow_exceptions parameter since 3.2.0 */ static basic_json from_ubjson(detail::input_adapter i, const bool strict = true, @@ -17581,6 +17583,9 @@ class basic_json return res ? result : basic_json(value_t::discarded); } + /*! + @copydoc from_ubjson(detail::input_adapter, const bool, const bool) + */ template::value, int> = 0> static basic_json from_ubjson(A1 && a1, A2 && a2, From 1e38ffc01413f7ac46e1362c6fb55e0de5ab5049 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Tue, 20 Mar 2018 20:04:11 +0100 Subject: [PATCH 30/43] :white_check_mark: more tests --- test/src/unit-cbor.cpp | 9 ++++++++- test/src/unit-msgpack.cpp | 4 ++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/test/src/unit-cbor.cpp b/test/src/unit-cbor.cpp index b0efe6bc..ba07026a 100644 --- a/test/src/unit-cbor.cpp +++ b/test/src/unit-cbor.cpp @@ -1295,7 +1295,8 @@ TEST_CASE("CBOR") CHECK_THROWS_AS(json::from_cbor(std::vector({0x82, 0x01})), json::parse_error&); CHECK_THROWS_AS(json::from_cbor(std::vector({0x9F, 0x01})), json::parse_error&); CHECK_THROWS_AS(json::from_cbor(std::vector({0xBF, 0x61, 0x61, 0xF5})), json::parse_error&); - + CHECK_THROWS_AS(json::from_cbor(std::vector({0xA1, 0x61, 0X61})), json::parse_error&); + CHECK_THROWS_AS(json::from_cbor(std::vector({0xBF, 0x61, 0X61})), json::parse_error&); CHECK_THROWS_WITH(json::from_cbor(std::vector({0x18})), "[json.exception.parse_error.110] parse error at 2: unexpected end of input"); @@ -1341,6 +1342,10 @@ TEST_CASE("CBOR") "[json.exception.parse_error.110] parse error at 3: unexpected end of input"); CHECK_THROWS_WITH(json::from_cbor(std::vector({0xBF, 0x61, 0x61, 0xF5})), "[json.exception.parse_error.110] parse error at 5: unexpected end of input"); + CHECK_THROWS_WITH(json::from_cbor(std::vector({0xA1, 0x61, 0x61})), + "[json.exception.parse_error.110] parse error at 4: unexpected end of input"); + CHECK_THROWS_WITH(json::from_cbor(std::vector({0xBF, 0x61, 0x61})), + "[json.exception.parse_error.110] parse error at 4: unexpected end of input"); CHECK(json::from_cbor(std::vector({0x18}), true, false).is_discarded()); CHECK(json::from_cbor(std::vector({0x19}), true, false).is_discarded()); @@ -1364,6 +1369,8 @@ TEST_CASE("CBOR") CHECK(json::from_cbor(std::vector({0x82, 0x01}), true, false).is_discarded()); CHECK(json::from_cbor(std::vector({0x9F, 0x01}), true, false).is_discarded()); CHECK(json::from_cbor(std::vector({0xBF, 0x61, 0x61, 0xF5}), true, false).is_discarded()); + CHECK(json::from_cbor(std::vector({0xA1, 0x61, 0x61}), true, false).is_discarded()); + CHECK(json::from_cbor(std::vector({0xBF, 0x61, 0x61}), true, false).is_discarded()); } SECTION("unsupported bytes") diff --git a/test/src/unit-msgpack.cpp b/test/src/unit-msgpack.cpp index 9cb3d18b..e1c209d9 100644 --- a/test/src/unit-msgpack.cpp +++ b/test/src/unit-msgpack.cpp @@ -1077,6 +1077,7 @@ TEST_CASE("MessagePack") CHECK_THROWS_AS(json::from_msgpack(std::vector({0xcf, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00})), json::parse_error&); CHECK_THROWS_AS(json::from_msgpack(std::vector({0xa5, 0x68, 0x65})), json::parse_error&); CHECK_THROWS_AS(json::from_msgpack(std::vector({0x92, 0x01})), json::parse_error&); + CHECK_THROWS_AS(json::from_msgpack(std::vector({0x81, 0xa1, 0x61})), json::parse_error&); CHECK_THROWS_WITH(json::from_msgpack(std::vector({0x87})), "[json.exception.parse_error.110] parse error at 2: unexpected end of input"); @@ -1114,6 +1115,8 @@ TEST_CASE("MessagePack") "[json.exception.parse_error.110] parse error at 4: unexpected end of input"); CHECK_THROWS_WITH(json::from_msgpack(std::vector({0x92, 0x01})), "[json.exception.parse_error.110] parse error at 3: unexpected end of input"); + CHECK_THROWS_WITH(json::from_msgpack(std::vector({0x81, 0xa1, 0x61})), + "[json.exception.parse_error.110] parse error at 4: unexpected end of input"); CHECK(json::from_msgpack(std::vector({0x87}), true, false).is_discarded()); CHECK(json::from_msgpack(std::vector({0xcc}), true, false).is_discarded()); @@ -1133,6 +1136,7 @@ TEST_CASE("MessagePack") CHECK(json::from_msgpack(std::vector({0xcf, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}), true, false).is_discarded()); CHECK(json::from_msgpack(std::vector({0xa5, 0x68, 0x65}), true, false).is_discarded()); CHECK(json::from_msgpack(std::vector({0x92, 0x01}), true, false).is_discarded()); + CHECK(json::from_msgpack(std::vector({0x81, 0xA1, 0x61}), true, false).is_discarded()); } SECTION("unsupported bytes") From 9e1abb48423c87ada6eb39d4cf9451388a31d55d Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Tue, 20 Mar 2018 22:39:08 +0100 Subject: [PATCH 31/43] :white_check_mark: improved coverage --- .../nlohmann/detail/input/binary_reader.hpp | 18 +-- .../nlohmann/detail/input/input_adapters.hpp | 3 + include/nlohmann/detail/input/json_sax.hpp | 26 +--- include/nlohmann/detail/input/lexer.hpp | 2 +- include/nlohmann/detail/input/parser.hpp | 27 ++-- include/nlohmann/json.hpp | 59 ++++---- single_include/nlohmann/json.hpp | 135 +++++++----------- test/src/unit-cbor.cpp | 94 ++++++++++++ test/src/unit-class_parser.cpp | 13 +- test/src/unit-deserialization.cpp | 6 - test/src/unit-msgpack.cpp | 94 ++++++++++++ test/src/unit-ubjson.cpp | 115 +++++++++++++++ 12 files changed, 412 insertions(+), 180 deletions(-) diff --git a/include/nlohmann/detail/input/binary_reader.hpp b/include/nlohmann/detail/input/binary_reader.hpp index d3c09bc9..0feb6dd4 100644 --- a/include/nlohmann/detail/input/binary_reader.hpp +++ b/include/nlohmann/detail/input/binary_reader.hpp @@ -41,9 +41,6 @@ class binary_reader using json_sax_t = json_sax; public: - /// the supported binary input formats - enum class binary_format_t { cbor, msgpack, ubjson }; - /*! @brief create a binary reader @@ -61,30 +58,35 @@ class binary_reader @return */ - bool sax_parse(const binary_format_t format, json_sax_t* sax_, const bool strict) + bool sax_parse(const input_format_t format, + json_sax_t* sax_, + const bool strict = true) { sax = sax_; bool result; switch (format) { - case binary_format_t::cbor: + case input_format_t::cbor: result = parse_cbor_internal(); break; - case binary_format_t::msgpack: + case input_format_t::msgpack: result = parse_msgpack_internal(); break; - case binary_format_t::ubjson: + case input_format_t::ubjson: result = parse_ubjson_internal(); break; + + default: + assert(false); // LCOV_EXCL_LINE } // strict mode: next byte must be EOF if (result and strict) { - if (format == binary_format_t::ubjson) + if (format == input_format_t::ubjson) { get_ignore_noop(); } diff --git a/include/nlohmann/detail/input/input_adapters.hpp b/include/nlohmann/detail/input/input_adapters.hpp index ef66948d..92987e97 100644 --- a/include/nlohmann/detail/input/input_adapters.hpp +++ b/include/nlohmann/detail/input/input_adapters.hpp @@ -20,6 +20,9 @@ namespace nlohmann { namespace detail { +/// the supported input formats +enum class input_format_t { json, cbor, msgpack, ubjson }; + //////////////////// // input adapters // //////////////////// diff --git a/include/nlohmann/detail/input/json_sax.hpp b/include/nlohmann/detail/input/json_sax.hpp index 5763e652..98479eb9 100644 --- a/include/nlohmann/detail/input/json_sax.hpp +++ b/include/nlohmann/detail/input/json_sax.hpp @@ -105,21 +105,12 @@ struct json_sax */ virtual bool end_array() = 0; - /*! - @brief a binary value was read - @param[in] val byte vector - @return whether parsing should proceed - @note examples are CBOR type 2 strings, MessagePack bin, and maybe UBJSON - array - */ - virtual bool binary(const std::vector& val) = 0; - /*! @brief a parse error occurred @param[in] position the position in the input where the error occurs @param[in] last_token the last read token @param[in] error_msg a detailed error message - @return whether parsing should proceed + @return whether parsing should proceed (must return false) */ virtual bool parse_error(std::size_t position, const std::string& last_token, @@ -225,11 +216,6 @@ class json_sax_dom_parser : public json_sax return true; } - bool binary(const std::vector&) override - { - return true; - } - bool parse_error(std::size_t, const std::string&, const detail::exception& ex) override { @@ -430,11 +416,6 @@ class json_sax_dom_callback_parser : public json_sax return true; } - bool binary(const std::vector&) override - { - return true; - } - bool parse_error(std::size_t, const std::string&, const detail::exception& ex) override { @@ -580,11 +561,6 @@ class json_sax_acceptor : public json_sax return true; } - bool binary(const std::vector&) override - { - return true; - } - bool parse_error(std::size_t, const std::string&, const detail::exception&) override { return false; diff --git a/include/nlohmann/detail/input/lexer.hpp b/include/nlohmann/detail/input/lexer.hpp index 98cc1b69..20c1b3fe 100644 --- a/include/nlohmann/detail/input/lexer.hpp +++ b/include/nlohmann/detail/input/lexer.hpp @@ -99,7 +99,7 @@ class lexer } } - explicit lexer(detail::input_adapter_t adapter) + explicit lexer(detail::input_adapter_t&& adapter) : ia(std::move(adapter)), decimal_point_char(get_decimal_point()) {} // delete because of pointer members diff --git a/include/nlohmann/detail/input/parser.hpp b/include/nlohmann/detail/input/parser.hpp index 5d389dc6..a38101c9 100644 --- a/include/nlohmann/detail/input/parser.hpp +++ b/include/nlohmann/detail/input/parser.hpp @@ -60,10 +60,10 @@ class parser std::function; /// a parser reading from an input adapter - explicit parser(detail::input_adapter_t adapter, + explicit parser(detail::input_adapter_t&& adapter, const parser_callback_t cb = nullptr, const bool allow_exceptions_ = true) - : callback(cb), m_lexer(adapter), allow_exceptions(allow_exceptions_) + : callback(cb), m_lexer(std::move(adapter)), allow_exceptions(allow_exceptions_) { // read first token get_token(); @@ -160,19 +160,22 @@ class parser bool accept(const bool strict = true) { json_sax_acceptor sax_acceptor; - - if (not sax_parse_internal(&sax_acceptor)) - { - return false; - } - - // strict => last token must be EOF - return not strict or (get_token() == token_type::end_of_input); + return sax_parse(&sax_acceptor, strict); } - bool sax_parse(json_sax_t* sax) + bool sax_parse(json_sax_t* sax, const bool strict = true) { - return sax_parse_internal(sax); + const bool result = sax_parse_internal(sax); + + // strict mode: next byte must be EOF + if (result and strict and (get_token() != token_type::end_of_input)) + { + return sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_of_input))); + } + + return result; } private: diff --git a/include/nlohmann/json.hpp b/include/nlohmann/json.hpp index ff2f7f34..66d0fb37 100644 --- a/include/nlohmann/json.hpp +++ b/include/nlohmann/json.hpp @@ -209,6 +209,8 @@ class basic_json /// helper type for initializer lists of basic_json values using initializer_list_t = std::initializer_list>; + using input_format_t = detail::input_format_t; + //////////////// // exceptions // //////////////// @@ -5996,7 +5998,7 @@ class basic_json @since version 2.0.3 (contiguous containers) */ - static basic_json parse(detail::input_adapter i, + static basic_json parse(detail::input_adapter&& i, const parser_callback_t cb = nullptr, const bool allow_exceptions = true) { @@ -6005,36 +6007,23 @@ class basic_json return result; } - /*! - @copydoc basic_json parse(detail::input_adapter, const parser_callback_t) - */ - static basic_json parse(detail::input_adapter& i, - const parser_callback_t cb = nullptr, - const bool allow_exceptions = true) - { - basic_json result; - parser(i, cb, allow_exceptions).parse(true, result); - return result; - } - - static bool accept(detail::input_adapter i) + static bool accept(detail::input_adapter&& i) { return parser(i).accept(true); } - static bool accept(detail::input_adapter& i) + static bool sax_parse(detail::input_adapter&& i, json_sax_t* sax, + input_format_t format = input_format_t::json, + const bool strict = true) { - return parser(i).accept(true); - } - - static bool sax_parse(detail::input_adapter i, json_sax_t* sax) - { - return parser(i).sax_parse(sax); - } - - static bool sax_parse(detail::input_adapter& i, json_sax_t* sax) - { - return parser(i).sax_parse(sax); + assert(sax); + switch (format) + { + case input_format_t::json: + return parser(std::move(i)).sax_parse(sax, strict); + default: + return binary_reader(std::move(i)).sax_parse(format, sax, strict); + } } /*! @@ -6639,13 +6628,13 @@ class basic_json @a strict parameter since 3.0.0; added @allow_exceptions parameter since 3.2.0 */ - static basic_json from_cbor(detail::input_adapter i, + static basic_json from_cbor(detail::input_adapter&& i, const bool strict = true, const bool allow_exceptions = true) { basic_json result; detail::json_sax_dom_parser sdp(result, allow_exceptions); - const bool res = binary_reader(detail::input_adapter(i)).sax_parse(binary_reader::binary_format_t::cbor, &sdp, strict); + const bool res = binary_reader(detail::input_adapter(i)).sax_parse(input_format_t::cbor, &sdp, strict); return res ? result : basic_json(value_t::discarded); } @@ -6660,7 +6649,7 @@ class basic_json { basic_json result; detail::json_sax_dom_parser sdp(result, allow_exceptions); - const bool res = binary_reader(detail::input_adapter(std::forward(a1), std::forward(a2))).sax_parse(binary_reader::binary_format_t::cbor, &sdp, strict); + const bool res = binary_reader(detail::input_adapter(std::forward(a1), std::forward(a2))).sax_parse(input_format_t::cbor, &sdp, strict); return res ? result : basic_json(value_t::discarded); } @@ -6742,13 +6731,13 @@ class basic_json @a strict parameter since 3.0.0; added @allow_exceptions parameter since 3.2.0 */ - static basic_json from_msgpack(detail::input_adapter i, + static basic_json from_msgpack(detail::input_adapter&& i, const bool strict = true, const bool allow_exceptions = true) { basic_json result; detail::json_sax_dom_parser sdp(result, allow_exceptions); - const bool res = binary_reader(detail::input_adapter(i)).sax_parse(binary_reader::binary_format_t::msgpack, &sdp, strict); + const bool res = binary_reader(detail::input_adapter(i)).sax_parse(input_format_t::msgpack, &sdp, strict); return res ? result : basic_json(value_t::discarded); } @@ -6763,7 +6752,7 @@ class basic_json { basic_json result; detail::json_sax_dom_parser sdp(result, allow_exceptions); - const bool res = binary_reader(detail::input_adapter(std::forward(a1), std::forward(a2))).sax_parse(binary_reader::binary_format_t::msgpack, &sdp, strict); + const bool res = binary_reader(detail::input_adapter(std::forward(a1), std::forward(a2))).sax_parse(input_format_t::msgpack, &sdp, strict); return res ? result : basic_json(value_t::discarded); } @@ -6824,13 +6813,13 @@ class basic_json @since version 3.1.0; added @allow_exceptions parameter since 3.2.0 */ - static basic_json from_ubjson(detail::input_adapter i, + static basic_json from_ubjson(detail::input_adapter&& i, const bool strict = true, const bool allow_exceptions = true) { basic_json result; detail::json_sax_dom_parser sdp(result, allow_exceptions); - const bool res = binary_reader(detail::input_adapter(i)).sax_parse(binary_reader::binary_format_t::ubjson, &sdp, strict); + const bool res = binary_reader(detail::input_adapter(i)).sax_parse(input_format_t::ubjson, &sdp, strict); return res ? result : basic_json(value_t::discarded); } @@ -6845,7 +6834,7 @@ class basic_json { basic_json result; detail::json_sax_dom_parser sdp(result, allow_exceptions); - const bool res = binary_reader(detail::input_adapter(std::forward(a1), std::forward(a2))).sax_parse(binary_reader::binary_format_t::ubjson, &sdp, strict); + const bool res = binary_reader(detail::input_adapter(std::forward(a1), std::forward(a2))).sax_parse(input_format_t::ubjson, &sdp, strict); return res ? result : basic_json(value_t::discarded); } diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index 67af8de1..55e1e01b 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -1593,6 +1593,9 @@ namespace nlohmann { namespace detail { +/// the supported input formats +enum class input_format_t { json, cbor, msgpack, ubjson }; + //////////////////// // input adapters // //////////////////// @@ -1938,7 +1941,7 @@ class lexer } } - explicit lexer(detail::input_adapter_t adapter) + explicit lexer(detail::input_adapter_t&& adapter) : ia(std::move(adapter)), decimal_point_char(get_decimal_point()) {} // delete because of pointer members @@ -3242,21 +3245,12 @@ struct json_sax */ virtual bool end_array() = 0; - /*! - @brief a binary value was read - @param[in] val byte vector - @return whether parsing should proceed - @note examples are CBOR type 2 strings, MessagePack bin, and maybe UBJSON - array - */ - virtual bool binary(const std::vector& val) = 0; - /*! @brief a parse error occurred @param[in] position the position in the input where the error occurs @param[in] last_token the last read token @param[in] error_msg a detailed error message - @return whether parsing should proceed + @return whether parsing should proceed (must return false) */ virtual bool parse_error(std::size_t position, const std::string& last_token, @@ -3362,11 +3356,6 @@ class json_sax_dom_parser : public json_sax return true; } - bool binary(const std::vector&) override - { - return true; - } - bool parse_error(std::size_t, const std::string&, const detail::exception& ex) override { @@ -3567,11 +3556,6 @@ class json_sax_dom_callback_parser : public json_sax return true; } - bool binary(const std::vector&) override - { - return true; - } - bool parse_error(std::size_t, const std::string&, const detail::exception& ex) override { @@ -3717,11 +3701,6 @@ class json_sax_acceptor : public json_sax return true; } - bool binary(const std::vector&) override - { - return true; - } - bool parse_error(std::size_t, const std::string&, const detail::exception&) override { return false; @@ -3783,10 +3762,10 @@ class parser std::function; /// a parser reading from an input adapter - explicit parser(detail::input_adapter_t adapter, + explicit parser(detail::input_adapter_t&& adapter, const parser_callback_t cb = nullptr, const bool allow_exceptions_ = true) - : callback(cb), m_lexer(adapter), allow_exceptions(allow_exceptions_) + : callback(cb), m_lexer(std::move(adapter)), allow_exceptions(allow_exceptions_) { // read first token get_token(); @@ -3883,19 +3862,22 @@ class parser bool accept(const bool strict = true) { json_sax_acceptor sax_acceptor; - - if (not sax_parse_internal(&sax_acceptor)) - { - return false; - } - - // strict => last token must be EOF - return not strict or (get_token() == token_type::end_of_input); + return sax_parse(&sax_acceptor, strict); } - bool sax_parse(json_sax_t* sax) + bool sax_parse(json_sax_t* sax, const bool strict = true) { - return sax_parse_internal(sax); + const bool result = sax_parse_internal(sax); + + // strict mode: next byte must be EOF + if (result and strict and (get_token() != token_type::end_of_input)) + { + return sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_of_input))); + } + + return result; } private: @@ -5699,9 +5681,6 @@ class binary_reader using json_sax_t = json_sax; public: - /// the supported binary input formats - enum class binary_format_t { cbor, msgpack, ubjson }; - /*! @brief create a binary reader @@ -5719,30 +5698,35 @@ class binary_reader @return */ - bool sax_parse(const binary_format_t format, json_sax_t* sax_, const bool strict) + bool sax_parse(const input_format_t format, + json_sax_t* sax_, + const bool strict = true) { sax = sax_; bool result; switch (format) { - case binary_format_t::cbor: + case input_format_t::cbor: result = parse_cbor_internal(); break; - case binary_format_t::msgpack: + case input_format_t::msgpack: result = parse_msgpack_internal(); break; - case binary_format_t::ubjson: + case input_format_t::ubjson: result = parse_ubjson_internal(); break; + + default: + assert(false); // LCOV_EXCL_LINE } // strict mode: next byte must be EOF if (result and strict) { - if (format == binary_format_t::ubjson) + if (format == input_format_t::ubjson) { get_ignore_noop(); } @@ -10958,6 +10942,8 @@ class basic_json /// helper type for initializer lists of basic_json values using initializer_list_t = std::initializer_list>; + using input_format_t = detail::input_format_t; + //////////////// // exceptions // //////////////// @@ -16745,7 +16731,7 @@ class basic_json @since version 2.0.3 (contiguous containers) */ - static basic_json parse(detail::input_adapter i, + static basic_json parse(detail::input_adapter&& i, const parser_callback_t cb = nullptr, const bool allow_exceptions = true) { @@ -16754,36 +16740,23 @@ class basic_json return result; } - /*! - @copydoc basic_json parse(detail::input_adapter, const parser_callback_t) - */ - static basic_json parse(detail::input_adapter& i, - const parser_callback_t cb = nullptr, - const bool allow_exceptions = true) - { - basic_json result; - parser(i, cb, allow_exceptions).parse(true, result); - return result; - } - - static bool accept(detail::input_adapter i) + static bool accept(detail::input_adapter&& i) { return parser(i).accept(true); } - static bool accept(detail::input_adapter& i) + static bool sax_parse(detail::input_adapter&& i, json_sax_t* sax, + input_format_t format = input_format_t::json, + const bool strict = true) { - return parser(i).accept(true); - } - - static bool sax_parse(detail::input_adapter i, json_sax_t* sax) - { - return parser(i).sax_parse(sax); - } - - static bool sax_parse(detail::input_adapter& i, json_sax_t* sax) - { - return parser(i).sax_parse(sax); + assert(sax); + switch (format) + { + case input_format_t::json: + return parser(std::move(i)).sax_parse(sax, strict); + default: + return binary_reader(std::move(i)).sax_parse(format, sax, strict); + } } /*! @@ -17388,13 +17361,13 @@ class basic_json @a strict parameter since 3.0.0; added @allow_exceptions parameter since 3.2.0 */ - static basic_json from_cbor(detail::input_adapter i, + static basic_json from_cbor(detail::input_adapter&& i, const bool strict = true, const bool allow_exceptions = true) { basic_json result; detail::json_sax_dom_parser sdp(result, allow_exceptions); - const bool res = binary_reader(detail::input_adapter(i)).sax_parse(binary_reader::binary_format_t::cbor, &sdp, strict); + const bool res = binary_reader(detail::input_adapter(i)).sax_parse(input_format_t::cbor, &sdp, strict); return res ? result : basic_json(value_t::discarded); } @@ -17409,7 +17382,7 @@ class basic_json { basic_json result; detail::json_sax_dom_parser sdp(result, allow_exceptions); - const bool res = binary_reader(detail::input_adapter(std::forward(a1), std::forward(a2))).sax_parse(binary_reader::binary_format_t::cbor, &sdp, strict); + const bool res = binary_reader(detail::input_adapter(std::forward(a1), std::forward(a2))).sax_parse(input_format_t::cbor, &sdp, strict); return res ? result : basic_json(value_t::discarded); } @@ -17491,13 +17464,13 @@ class basic_json @a strict parameter since 3.0.0; added @allow_exceptions parameter since 3.2.0 */ - static basic_json from_msgpack(detail::input_adapter i, + static basic_json from_msgpack(detail::input_adapter&& i, const bool strict = true, const bool allow_exceptions = true) { basic_json result; detail::json_sax_dom_parser sdp(result, allow_exceptions); - const bool res = binary_reader(detail::input_adapter(i)).sax_parse(binary_reader::binary_format_t::msgpack, &sdp, strict); + const bool res = binary_reader(detail::input_adapter(i)).sax_parse(input_format_t::msgpack, &sdp, strict); return res ? result : basic_json(value_t::discarded); } @@ -17512,7 +17485,7 @@ class basic_json { basic_json result; detail::json_sax_dom_parser sdp(result, allow_exceptions); - const bool res = binary_reader(detail::input_adapter(std::forward(a1), std::forward(a2))).sax_parse(binary_reader::binary_format_t::msgpack, &sdp, strict); + const bool res = binary_reader(detail::input_adapter(std::forward(a1), std::forward(a2))).sax_parse(input_format_t::msgpack, &sdp, strict); return res ? result : basic_json(value_t::discarded); } @@ -17573,13 +17546,13 @@ class basic_json @since version 3.1.0; added @allow_exceptions parameter since 3.2.0 */ - static basic_json from_ubjson(detail::input_adapter i, + static basic_json from_ubjson(detail::input_adapter&& i, const bool strict = true, const bool allow_exceptions = true) { basic_json result; detail::json_sax_dom_parser sdp(result, allow_exceptions); - const bool res = binary_reader(detail::input_adapter(i)).sax_parse(binary_reader::binary_format_t::ubjson, &sdp, strict); + const bool res = binary_reader(detail::input_adapter(i)).sax_parse(input_format_t::ubjson, &sdp, strict); return res ? result : basic_json(value_t::discarded); } @@ -17594,7 +17567,7 @@ class basic_json { basic_json result; detail::json_sax_dom_parser sdp(result, allow_exceptions); - const bool res = binary_reader(detail::input_adapter(std::forward(a1), std::forward(a2))).sax_parse(binary_reader::binary_format_t::ubjson, &sdp, strict); + const bool res = binary_reader(detail::input_adapter(std::forward(a1), std::forward(a2))).sax_parse(input_format_t::ubjson, &sdp, strict); return res ? result : basic_json(value_t::discarded); } diff --git a/test/src/unit-cbor.cpp b/test/src/unit-cbor.cpp index ba07026a..cd8fb0db 100644 --- a/test/src/unit-cbor.cpp +++ b/test/src/unit-cbor.cpp @@ -33,6 +33,76 @@ using nlohmann::json; #include +class SaxCountdown : public nlohmann::json::json_sax_t +{ + public: + explicit SaxCountdown(const int count) : events_left(count) + {} + + bool null() override + { + return events_left-- > 0; + } + + bool boolean(bool) override + { + return events_left-- > 0; + } + + bool number_integer(json::number_integer_t) override + { + return events_left-- > 0; + } + + bool number_unsigned(json::number_unsigned_t) override + { + return events_left-- > 0; + } + + bool number_float(json::number_float_t, const std::string&) override + { + return events_left-- > 0; + } + + bool string(std::string&&) override + { + return events_left-- > 0; + } + + bool start_object(std::size_t) override + { + return events_left-- > 0; + } + + bool key(std::string&&) override + { + return events_left-- > 0; + } + + bool end_object() override + { + return events_left-- > 0; + } + + bool start_array(std::size_t) override + { + return events_left-- > 0; + } + + bool end_array() override + { + return events_left-- > 0; + } + + bool parse_error(std::size_t, const std::string&, const json::exception&) override + { + return false; + } + + private: + int events_left = 0; +}; + TEST_CASE("CBOR") { SECTION("individual values") @@ -1467,6 +1537,30 @@ TEST_CASE("CBOR") } } } + + SECTION("SAX aborts") + { + SECTION("start_array(len)") + { + std::vector v = {0x83, 0x01, 0x02, 0x03}; + SaxCountdown scp(0); + CHECK(not json::sax_parse(v, &scp, json::input_format_t::cbor)); + } + + SECTION("start_object(len)") + { + std::vector v = {0xA1, 0x63, 0x66, 0x6F, 0x6F, 0xF4}; + SaxCountdown scp(0); + CHECK(not json::sax_parse(v, &scp, json::input_format_t::cbor)); + } + + SECTION("key()") + { + std::vector v = {0xA1, 0x63, 0x66, 0x6F, 0x6F, 0xF4}; + SaxCountdown scp(1); + CHECK(not json::sax_parse(v, &scp, json::input_format_t::cbor)); + } + } } // use this testcase outside [hide] to run it with Valgrind diff --git a/test/src/unit-class_parser.cpp b/test/src/unit-class_parser.cpp index c425207a..1e100568 100644 --- a/test/src/unit-class_parser.cpp +++ b/test/src/unit-class_parser.cpp @@ -117,12 +117,6 @@ class SaxEventLogger : public nlohmann::json::json_sax_t return true; } - bool binary(const std::vector&) override - { - events.push_back("binary()"); - return true; - } - bool parse_error(std::size_t position, const std::string&, const json::exception&) override { errored = true; @@ -195,11 +189,6 @@ class SaxCountdown : public nlohmann::json::json_sax_t return events_left-- > 0; } - bool binary(const std::vector&) override - { - return events_left-- > 0; - } - bool parse_error(std::size_t, const std::string&, const json::exception&) override { return false; @@ -248,7 +237,7 @@ bool accept_helper(const std::string& s) // 4. parse with SAX (compare with relaxed accept result) SaxEventLogger el; - CHECK_NOTHROW(json::sax_parse(s, &el)); + CHECK_NOTHROW(json::sax_parse(s, &el, json::input_format_t::json, false)); CHECK(json::parser(nlohmann::detail::input_adapter(s)).accept(false) == not el.errored); // 5. parse with simple callback diff --git a/test/src/unit-deserialization.cpp b/test/src/unit-deserialization.cpp index 4243e839..5aa3a8b6 100644 --- a/test/src/unit-deserialization.cpp +++ b/test/src/unit-deserialization.cpp @@ -116,12 +116,6 @@ struct SaxEventLogger : public nlohmann::json::json_sax_t return true; } - bool binary(const std::vector&) override - { - events.push_back("binary()"); - return true; - } - bool parse_error(std::size_t position, const std::string&, const json::exception&) override { events.push_back("parse_error(" + std::to_string(position) + ")"); diff --git a/test/src/unit-msgpack.cpp b/test/src/unit-msgpack.cpp index e1c209d9..6974c530 100644 --- a/test/src/unit-msgpack.cpp +++ b/test/src/unit-msgpack.cpp @@ -33,6 +33,76 @@ using nlohmann::json; #include +class SaxCountdown : public nlohmann::json::json_sax_t +{ + public: + explicit SaxCountdown(const int count) : events_left(count) + {} + + bool null() override + { + return events_left-- > 0; + } + + bool boolean(bool) override + { + return events_left-- > 0; + } + + bool number_integer(json::number_integer_t) override + { + return events_left-- > 0; + } + + bool number_unsigned(json::number_unsigned_t) override + { + return events_left-- > 0; + } + + bool number_float(json::number_float_t, const std::string&) override + { + return events_left-- > 0; + } + + bool string(std::string&&) override + { + return events_left-- > 0; + } + + bool start_object(std::size_t) override + { + return events_left-- > 0; + } + + bool key(std::string&&) override + { + return events_left-- > 0; + } + + bool end_object() override + { + return events_left-- > 0; + } + + bool start_array(std::size_t) override + { + return events_left-- > 0; + } + + bool end_array() override + { + return events_left-- > 0; + } + + bool parse_error(std::size_t, const std::string&, const json::exception&) override + { + return false; + } + + private: + int events_left = 0; +}; + TEST_CASE("MessagePack") { SECTION("individual values") @@ -1200,6 +1270,30 @@ TEST_CASE("MessagePack") } } } + + SECTION("SAX aborts") + { + SECTION("start_array(len)") + { + std::vector v = {0x93, 0x01, 0x02, 0x03}; + SaxCountdown scp(0); + CHECK(not json::sax_parse(v, &scp, json::input_format_t::msgpack)); + } + + SECTION("start_object(len)") + { + std::vector v = {0x81, 0xa3, 0x66, 0x6F, 0x6F, 0xc2}; + SaxCountdown scp(0); + CHECK(not json::sax_parse(v, &scp, json::input_format_t::msgpack)); + } + + SECTION("key()") + { + std::vector v = {0x81, 0xa3, 0x66, 0x6F, 0x6F, 0xc2}; + SaxCountdown scp(1); + CHECK(not json::sax_parse(v, &scp, json::input_format_t::msgpack)); + } + } } diff --git a/test/src/unit-ubjson.cpp b/test/src/unit-ubjson.cpp index 11774268..7f92d969 100644 --- a/test/src/unit-ubjson.cpp +++ b/test/src/unit-ubjson.cpp @@ -33,6 +33,76 @@ using nlohmann::json; #include +class SaxCountdown : public nlohmann::json::json_sax_t +{ + public: + explicit SaxCountdown(const int count) : events_left(count) + {} + + bool null() override + { + return events_left-- > 0; + } + + bool boolean(bool) override + { + return events_left-- > 0; + } + + bool number_integer(json::number_integer_t) override + { + return events_left-- > 0; + } + + bool number_unsigned(json::number_unsigned_t) override + { + return events_left-- > 0; + } + + bool number_float(json::number_float_t, const std::string&) override + { + return events_left-- > 0; + } + + bool string(std::string&&) override + { + return events_left-- > 0; + } + + bool start_object(std::size_t) override + { + return events_left-- > 0; + } + + bool key(std::string&&) override + { + return events_left-- > 0; + } + + bool end_object() override + { + return events_left-- > 0; + } + + bool start_array(std::size_t) override + { + return events_left-- > 0; + } + + bool end_array() override + { + return events_left-- > 0; + } + + bool parse_error(std::size_t, const std::string&, const json::exception&) override + { + return false; + } + + private: + int events_left = 0; +}; + TEST_CASE("UBJSON") { SECTION("individual values") @@ -1241,6 +1311,51 @@ TEST_CASE("UBJSON") } } + SECTION("SAX aborts") + { + SECTION("start_array()") + { + std::vector v = {'[', 'T', 'F', ']'}; + SaxCountdown scp(0); + CHECK(not json::sax_parse(v, &scp, json::input_format_t::ubjson)); + } + + SECTION("start_object()") + { + std::vector v = {'{', 'i', 3, 'f', 'o', 'o', 'F', '}'}; + SaxCountdown scp(0); + CHECK(not json::sax_parse(v, &scp, json::input_format_t::ubjson)); + } + + SECTION("key() in object") + { + std::vector v = {'{', 'i', 3, 'f', 'o', 'o', 'F', '}'}; + SaxCountdown scp(1); + CHECK(not json::sax_parse(v, &scp, json::input_format_t::ubjson)); + } + + SECTION("start_array(len)") + { + std::vector v = {'[', '#', 'i', '2', 'T', 'F'}; + SaxCountdown scp(0); + CHECK(not json::sax_parse(v, &scp, json::input_format_t::ubjson)); + } + + SECTION("start_object(len)") + { + std::vector v = {'{', '#', 'i', '1', 3, 'f', 'o', 'o', 'F'}; + SaxCountdown scp(0); + CHECK(not json::sax_parse(v, &scp, json::input_format_t::ubjson)); + } + + SECTION("key() in object with length") + { + std::vector v = {'{', 'i', 3, 'f', 'o', 'o', 'F', '}'}; + SaxCountdown scp(1); + CHECK(not json::sax_parse(v, &scp, json::input_format_t::ubjson)); + } + } + SECTION("parsing values") { SECTION("strings") From 2537677e4ca608b667d120f2f136b0f301081a93 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Tue, 20 Mar 2018 23:40:01 +0100 Subject: [PATCH 32/43] :white_check_mark: improved test coverage --- test/src/unit-ubjson.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/test/src/unit-ubjson.cpp b/test/src/unit-ubjson.cpp index 7f92d969..1ccb9e22 100644 --- a/test/src/unit-ubjson.cpp +++ b/test/src/unit-ubjson.cpp @@ -1664,6 +1664,16 @@ TEST_CASE("UBJSON") CHECK_THROWS_AS(json::from_ubjson(v3), json::parse_error&); CHECK_THROWS_WITH(json::from_ubjson(v3), "[json.exception.parse_error.110] parse error at 5: unexpected end of input"); CHECK(json::from_ubjson(v3, true, false).is_discarded()); + + std::vector vST1 = {'{', '$', 'd', '#', 'i', 2, 'i', 1, 'a'}; + CHECK_THROWS_AS(json::from_ubjson(vST1), json::parse_error&); + CHECK_THROWS_WITH(json::from_ubjson(vST1), "[json.exception.parse_error.110] parse error at 10: unexpected end of input"); + CHECK(json::from_ubjson(vST1, true, false).is_discarded()); + + std::vector vST2 = {'{', '#', 'i', 2, 'i', 1, 'a'}; + CHECK_THROWS_AS(json::from_ubjson(vST2), json::parse_error&); + CHECK_THROWS_WITH(json::from_ubjson(vST2), "[json.exception.parse_error.110] parse error at 8: unexpected end of input"); + CHECK(json::from_ubjson(vST2, true, false).is_discarded()); } } From 4f6b2b6429c71ffab9cbd2ce8ac16a734a1a562b Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Wed, 21 Mar 2018 20:12:06 +0100 Subject: [PATCH 33/43] :hammer: changed SAX interface --- .../nlohmann/detail/input/binary_reader.hpp | 38 +++++----- include/nlohmann/detail/input/json_sax.hpp | 18 ++--- include/nlohmann/detail/input/lexer.hpp | 4 +- include/nlohmann/detail/input/parser.hpp | 12 ++-- single_include/nlohmann/json.hpp | 72 ++++++++++--------- test/src/unit-cbor.cpp | 4 +- test/src/unit-class_parser.cpp | 8 +-- test/src/unit-deserialization.cpp | 6 +- test/src/unit-msgpack.cpp | 4 +- test/src/unit-ubjson.cpp | 8 +-- 10 files changed, 91 insertions(+), 83 deletions(-) diff --git a/include/nlohmann/detail/input/binary_reader.hpp b/include/nlohmann/detail/input/binary_reader.hpp index 0feb6dd4..e4e1f176 100644 --- a/include/nlohmann/detail/input/binary_reader.hpp +++ b/include/nlohmann/detail/input/binary_reader.hpp @@ -267,7 +267,7 @@ class binary_reader case 0x7F: // UTF-8 string (indefinite length) { string_t s; - return get_cbor_string(s) and sax->string(std::move(s)); + return get_cbor_string(s) and sax->string(s); } // array (0x00..0x17 data items follow) @@ -663,7 +663,7 @@ class binary_reader case 0xBF: { string_t s; - return get_msgpack_string(s) and sax->string(std::move(s)); + return get_msgpack_string(s) and sax->string(s); } case 0xC0: // nil @@ -740,7 +740,7 @@ class binary_reader case 0xDB: // str 32 { string_t s; - return get_msgpack_string(s) and sax->string(std::move(s)); + return get_msgpack_string(s) and sax->string(s); } case 0xDC: // array 16 @@ -1062,13 +1062,13 @@ class binary_reader return false; } + string_t key; if (len != json_sax_t::no_limit) { for (std::size_t i = 0; i < len; ++i) { get(); - string_t key; - if (JSON_UNLIKELY(not get_cbor_string(key) or not sax->key(std::move(key)))) + if (JSON_UNLIKELY(not get_cbor_string(key) or not sax->key(key))) { return false; } @@ -1077,14 +1077,14 @@ class binary_reader { return false; } + key.clear(); } } else { while (get() != 0xFF) { - string_t key; - if (JSON_UNLIKELY(not get_cbor_string(key) or not sax->key(std::move(key)))) + if (JSON_UNLIKELY(not get_cbor_string(key) or not sax->key(key))) { return false; } @@ -1093,6 +1093,7 @@ class binary_reader { return false; } + key.clear(); } } @@ -1214,11 +1215,11 @@ class binary_reader return false; } + string_t key; for (std::size_t i = 0; i < len; ++i) { get(); - string_t key; - if (JSON_UNLIKELY(not get_msgpack_string(key) or not sax->key(std::move(key)))) + if (JSON_UNLIKELY(not get_msgpack_string(key) or not sax->key(key))) { return false; } @@ -1227,6 +1228,7 @@ class binary_reader { return false; } + key.clear(); } return sax->end_object(); @@ -1485,13 +1487,14 @@ class binary_reader auto last_token = get_token_string(); return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, "byte after 'C' must be in range 0x00..0x7F; last byte: 0x" + last_token)); } - return sax->string(string_t(1, static_cast(current))); + string_t s(1, static_cast(current)); + return sax->string(s); } case 'S': // string { string_t s; - return get_ubjson_string(s) and sax->string(std::move(s)); + return get_ubjson_string(s) and sax->string(s); } case '[': // array @@ -1581,6 +1584,7 @@ class binary_reader return false; } + string_t key; if (size_and_type.first != string_t::npos) { if (JSON_UNLIKELY(not sax->start_object(size_and_type.first))) @@ -1592,8 +1596,7 @@ class binary_reader { for (std::size_t i = 0; i < size_and_type.first; ++i) { - string_t key; - if (JSON_UNLIKELY(not get_ubjson_string(key) or not sax->key(std::move(key)))) + if (JSON_UNLIKELY(not get_ubjson_string(key) or not sax->key(key))) { return false; } @@ -1601,14 +1604,14 @@ class binary_reader { return false; } + key.clear(); } } else { for (std::size_t i = 0; i < size_and_type.first; ++i) { - string_t key; - if (JSON_UNLIKELY(not get_ubjson_string(key) or not sax->key(std::move(key)))) + if (JSON_UNLIKELY(not get_ubjson_string(key) or not sax->key(key))) { return false; } @@ -1616,6 +1619,7 @@ class binary_reader { return false; } + key.clear(); } } } @@ -1628,8 +1632,7 @@ class binary_reader while (current != '}') { - string_t key; - if (JSON_UNLIKELY(not get_ubjson_string(key, false) or not sax->key(std::move(key)))) + if (JSON_UNLIKELY(not get_ubjson_string(key, false) or not sax->key(key))) { return false; } @@ -1638,6 +1641,7 @@ class binary_reader return false; } get_ignore_noop(); + key.clear(); } } diff --git a/include/nlohmann/detail/input/json_sax.hpp b/include/nlohmann/detail/input/json_sax.hpp index 98479eb9..eb92731f 100644 --- a/include/nlohmann/detail/input/json_sax.hpp +++ b/include/nlohmann/detail/input/json_sax.hpp @@ -68,7 +68,7 @@ struct json_sax @param[in] val string value @return whether parsing should proceed */ - virtual bool string(string_t&& val) = 0; + virtual bool string(string_t& val) = 0; /*! @brief the beginning of an object was read @@ -83,7 +83,7 @@ struct json_sax @param[in] val object key @return whether parsing should proceed */ - virtual bool key(string_t&& val) = 0; + virtual bool key(string_t& val) = 0; /*! @brief the end of an object was read @@ -165,7 +165,7 @@ class json_sax_dom_parser : public json_sax return true; } - bool string(string_t&& val) override + bool string(string_t& val) override { handle_value(val); return true; @@ -184,7 +184,7 @@ class json_sax_dom_parser : public json_sax return true; } - bool key(string_t&& val) override + bool key(string_t& val) override { // add null at given key and store the reference for later object_element = &(ref_stack.back()->m_value.object->operator[](val)); @@ -340,7 +340,7 @@ class json_sax_dom_callback_parser : public json_sax return true; } - bool string(string_t&& val) override + bool string(string_t& val) override { handle_value(val); return true; @@ -362,9 +362,9 @@ class json_sax_dom_callback_parser : public json_sax return true; } - bool key(string_t&& val) override + bool key(string_t& val) override { - BasicJsonType k = BasicJsonType(std::forward < string_t&& > (val)); + BasicJsonType k = BasicJsonType(val); const bool keep = callback(ref_stack.size(), parse_event_t::key, k); // add null at given key and store the reference for later @@ -531,7 +531,7 @@ class json_sax_acceptor : public json_sax return true; } - bool string(string_t&&) override + bool string(string_t&) override { return true; } @@ -541,7 +541,7 @@ class json_sax_acceptor : public json_sax return true; } - bool key(string_t&&) override + bool key(string_t&) override { return true; } diff --git a/include/nlohmann/detail/input/lexer.hpp b/include/nlohmann/detail/input/lexer.hpp index 20c1b3fe..1ed35617 100644 --- a/include/nlohmann/detail/input/lexer.hpp +++ b/include/nlohmann/detail/input/lexer.hpp @@ -1131,9 +1131,9 @@ scan_number_done: } /// return current string value (implicitly resets the token; useful only once) - string_t&& move_string() + string_t& get_string() { - return std::move(token_buffer); + return token_buffer; } ///////////////////// diff --git a/include/nlohmann/detail/input/parser.hpp b/include/nlohmann/detail/input/parser.hpp index a38101c9..d8f1ff34 100644 --- a/include/nlohmann/detail/input/parser.hpp +++ b/include/nlohmann/detail/input/parser.hpp @@ -239,7 +239,7 @@ class parser { return; } - key = m_lexer.move_string(); + key = m_lexer.get_string(); bool keep_tag = false; if (keep) @@ -375,7 +375,7 @@ class parser case token_type::value_string: { result.m_type = value_t::string; - result.m_value = m_lexer.move_string(); + result.m_value = m_lexer.get_string(); break; } @@ -498,7 +498,7 @@ class parser } else { - if (JSON_UNLIKELY(not sax->key(m_lexer.move_string()))) + if (JSON_UNLIKELY(not sax->key(m_lexer.get_string()))) { return false; } @@ -560,7 +560,7 @@ class parser } else { - if (JSON_UNLIKELY(not sax->number_float(res, m_lexer.move_string()))) + if (JSON_UNLIKELY(not sax->number_float(res, m_lexer.get_string()))) { return false; } @@ -606,7 +606,7 @@ class parser case token_type::value_string: { - if (JSON_UNLIKELY(not sax->string(m_lexer.move_string()))) + if (JSON_UNLIKELY(not sax->string(m_lexer.get_string()))) { return false; } @@ -706,7 +706,7 @@ class parser } else { - if (JSON_UNLIKELY(not sax->key(m_lexer.move_string()))) + if (JSON_UNLIKELY(not sax->key(m_lexer.get_string()))) { return false; } diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index 55e1e01b..de1f6bfe 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -2973,9 +2973,9 @@ scan_number_done: } /// return current string value (implicitly resets the token; useful only once) - string_t&& move_string() + string_t& get_string() { - return std::move(token_buffer); + return token_buffer; } ///////////////////// @@ -3208,7 +3208,7 @@ struct json_sax @param[in] val string value @return whether parsing should proceed */ - virtual bool string(string_t&& val) = 0; + virtual bool string(string_t& val) = 0; /*! @brief the beginning of an object was read @@ -3223,7 +3223,7 @@ struct json_sax @param[in] val object key @return whether parsing should proceed */ - virtual bool key(string_t&& val) = 0; + virtual bool key(string_t& val) = 0; /*! @brief the end of an object was read @@ -3305,7 +3305,7 @@ class json_sax_dom_parser : public json_sax return true; } - bool string(string_t&& val) override + bool string(string_t& val) override { handle_value(val); return true; @@ -3324,7 +3324,7 @@ class json_sax_dom_parser : public json_sax return true; } - bool key(string_t&& val) override + bool key(string_t& val) override { // add null at given key and store the reference for later object_element = &(ref_stack.back()->m_value.object->operator[](val)); @@ -3480,7 +3480,7 @@ class json_sax_dom_callback_parser : public json_sax return true; } - bool string(string_t&& val) override + bool string(string_t& val) override { handle_value(val); return true; @@ -3502,9 +3502,9 @@ class json_sax_dom_callback_parser : public json_sax return true; } - bool key(string_t&& val) override + bool key(string_t& val) override { - BasicJsonType k = BasicJsonType(std::forward < string_t&& > (val)); + BasicJsonType k = BasicJsonType(val); const bool keep = callback(ref_stack.size(), parse_event_t::key, k); // add null at given key and store the reference for later @@ -3671,7 +3671,7 @@ class json_sax_acceptor : public json_sax return true; } - bool string(string_t&&) override + bool string(string_t&) override { return true; } @@ -3681,7 +3681,7 @@ class json_sax_acceptor : public json_sax return true; } - bool key(string_t&&) override + bool key(string_t&) override { return true; } @@ -3941,7 +3941,7 @@ class parser { return; } - key = m_lexer.move_string(); + key = m_lexer.get_string(); bool keep_tag = false; if (keep) @@ -4077,7 +4077,7 @@ class parser case token_type::value_string: { result.m_type = value_t::string; - result.m_value = m_lexer.move_string(); + result.m_value = m_lexer.get_string(); break; } @@ -4200,7 +4200,7 @@ class parser } else { - if (JSON_UNLIKELY(not sax->key(m_lexer.move_string()))) + if (JSON_UNLIKELY(not sax->key(m_lexer.get_string()))) { return false; } @@ -4262,7 +4262,7 @@ class parser } else { - if (JSON_UNLIKELY(not sax->number_float(res, m_lexer.move_string()))) + if (JSON_UNLIKELY(not sax->number_float(res, m_lexer.get_string()))) { return false; } @@ -4308,7 +4308,7 @@ class parser case token_type::value_string: { - if (JSON_UNLIKELY(not sax->string(m_lexer.move_string()))) + if (JSON_UNLIKELY(not sax->string(m_lexer.get_string()))) { return false; } @@ -4408,7 +4408,7 @@ class parser } else { - if (JSON_UNLIKELY(not sax->key(m_lexer.move_string()))) + if (JSON_UNLIKELY(not sax->key(m_lexer.get_string()))) { return false; } @@ -5907,7 +5907,7 @@ class binary_reader case 0x7F: // UTF-8 string (indefinite length) { string_t s; - return get_cbor_string(s) and sax->string(std::move(s)); + return get_cbor_string(s) and sax->string(s); } // array (0x00..0x17 data items follow) @@ -6303,7 +6303,7 @@ class binary_reader case 0xBF: { string_t s; - return get_msgpack_string(s) and sax->string(std::move(s)); + return get_msgpack_string(s) and sax->string(s); } case 0xC0: // nil @@ -6380,7 +6380,7 @@ class binary_reader case 0xDB: // str 32 { string_t s; - return get_msgpack_string(s) and sax->string(std::move(s)); + return get_msgpack_string(s) and sax->string(s); } case 0xDC: // array 16 @@ -6702,13 +6702,13 @@ class binary_reader return false; } + string_t key; if (len != json_sax_t::no_limit) { for (std::size_t i = 0; i < len; ++i) { get(); - string_t key; - if (JSON_UNLIKELY(not get_cbor_string(key) or not sax->key(std::move(key)))) + if (JSON_UNLIKELY(not get_cbor_string(key) or not sax->key(key))) { return false; } @@ -6717,14 +6717,14 @@ class binary_reader { return false; } + key.clear(); } } else { while (get() != 0xFF) { - string_t key; - if (JSON_UNLIKELY(not get_cbor_string(key) or not sax->key(std::move(key)))) + if (JSON_UNLIKELY(not get_cbor_string(key) or not sax->key(key))) { return false; } @@ -6733,6 +6733,7 @@ class binary_reader { return false; } + key.clear(); } } @@ -6854,11 +6855,11 @@ class binary_reader return false; } + string_t key; for (std::size_t i = 0; i < len; ++i) { get(); - string_t key; - if (JSON_UNLIKELY(not get_msgpack_string(key) or not sax->key(std::move(key)))) + if (JSON_UNLIKELY(not get_msgpack_string(key) or not sax->key(key))) { return false; } @@ -6867,6 +6868,7 @@ class binary_reader { return false; } + key.clear(); } return sax->end_object(); @@ -7125,13 +7127,14 @@ class binary_reader auto last_token = get_token_string(); return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, "byte after 'C' must be in range 0x00..0x7F; last byte: 0x" + last_token)); } - return sax->string(string_t(1, static_cast(current))); + string_t s(1, static_cast(current)); + return sax->string(s); } case 'S': // string { string_t s; - return get_ubjson_string(s) and sax->string(std::move(s)); + return get_ubjson_string(s) and sax->string(s); } case '[': // array @@ -7221,6 +7224,7 @@ class binary_reader return false; } + string_t key; if (size_and_type.first != string_t::npos) { if (JSON_UNLIKELY(not sax->start_object(size_and_type.first))) @@ -7232,8 +7236,7 @@ class binary_reader { for (std::size_t i = 0; i < size_and_type.first; ++i) { - string_t key; - if (JSON_UNLIKELY(not get_ubjson_string(key) or not sax->key(std::move(key)))) + if (JSON_UNLIKELY(not get_ubjson_string(key) or not sax->key(key))) { return false; } @@ -7241,14 +7244,14 @@ class binary_reader { return false; } + key.clear(); } } else { for (std::size_t i = 0; i < size_and_type.first; ++i) { - string_t key; - if (JSON_UNLIKELY(not get_ubjson_string(key) or not sax->key(std::move(key)))) + if (JSON_UNLIKELY(not get_ubjson_string(key) or not sax->key(key))) { return false; } @@ -7256,6 +7259,7 @@ class binary_reader { return false; } + key.clear(); } } } @@ -7268,8 +7272,7 @@ class binary_reader while (current != '}') { - string_t key; - if (JSON_UNLIKELY(not get_ubjson_string(key, false) or not sax->key(std::move(key)))) + if (JSON_UNLIKELY(not get_ubjson_string(key, false) or not sax->key(key))) { return false; } @@ -7278,6 +7281,7 @@ class binary_reader return false; } get_ignore_noop(); + key.clear(); } } diff --git a/test/src/unit-cbor.cpp b/test/src/unit-cbor.cpp index cd8fb0db..29879508 100644 --- a/test/src/unit-cbor.cpp +++ b/test/src/unit-cbor.cpp @@ -64,7 +64,7 @@ class SaxCountdown : public nlohmann::json::json_sax_t return events_left-- > 0; } - bool string(std::string&&) override + bool string(std::string&) override { return events_left-- > 0; } @@ -74,7 +74,7 @@ class SaxCountdown : public nlohmann::json::json_sax_t return events_left-- > 0; } - bool key(std::string&&) override + bool key(std::string&) override { return events_left-- > 0; } diff --git a/test/src/unit-class_parser.cpp b/test/src/unit-class_parser.cpp index 1e100568..e4cfb536 100644 --- a/test/src/unit-class_parser.cpp +++ b/test/src/unit-class_parser.cpp @@ -67,7 +67,7 @@ class SaxEventLogger : public nlohmann::json::json_sax_t return true; } - bool string(std::string&& val) override + bool string(std::string& val) override { events.push_back("string(" + val + ")"); return true; @@ -86,7 +86,7 @@ class SaxEventLogger : public nlohmann::json::json_sax_t return true; } - bool key(std::string&& val) override + bool key(std::string& val) override { events.push_back("key(" + val + ")"); return true; @@ -159,7 +159,7 @@ class SaxCountdown : public nlohmann::json::json_sax_t return events_left-- > 0; } - bool string(std::string&&) override + bool string(std::string&) override { return events_left-- > 0; } @@ -169,7 +169,7 @@ class SaxCountdown : public nlohmann::json::json_sax_t return events_left-- > 0; } - bool key(std::string&&) override + bool key(std::string&) override { return events_left-- > 0; } diff --git a/test/src/unit-deserialization.cpp b/test/src/unit-deserialization.cpp index 5aa3a8b6..5c9fde5a 100644 --- a/test/src/unit-deserialization.cpp +++ b/test/src/unit-deserialization.cpp @@ -66,7 +66,7 @@ struct SaxEventLogger : public nlohmann::json::json_sax_t return true; } - bool string(std::string&& val) override + bool string(std::string& val) override { events.push_back("string(" + val + ")"); return true; @@ -85,7 +85,7 @@ struct SaxEventLogger : public nlohmann::json::json_sax_t return true; } - bool key(std::string&& val) override + bool key(std::string& val) override { events.push_back("key(" + val + ")"); return true; @@ -143,7 +143,7 @@ struct SaxEventLoggerExitAfterStartObject : public SaxEventLogger struct SaxEventLoggerExitAfterKey : public SaxEventLogger { - bool key(std::string&& val) override + bool key(std::string& val) override { events.push_back("key(" + val + ")"); return false; diff --git a/test/src/unit-msgpack.cpp b/test/src/unit-msgpack.cpp index 6974c530..7789ccb7 100644 --- a/test/src/unit-msgpack.cpp +++ b/test/src/unit-msgpack.cpp @@ -64,7 +64,7 @@ class SaxCountdown : public nlohmann::json::json_sax_t return events_left-- > 0; } - bool string(std::string&&) override + bool string(std::string&) override { return events_left-- > 0; } @@ -74,7 +74,7 @@ class SaxCountdown : public nlohmann::json::json_sax_t return events_left-- > 0; } - bool key(std::string&&) override + bool key(std::string&) override { return events_left-- > 0; } diff --git a/test/src/unit-ubjson.cpp b/test/src/unit-ubjson.cpp index 1ccb9e22..380e0f3f 100644 --- a/test/src/unit-ubjson.cpp +++ b/test/src/unit-ubjson.cpp @@ -64,7 +64,7 @@ class SaxCountdown : public nlohmann::json::json_sax_t return events_left-- > 0; } - bool string(std::string&&) override + bool string(std::string&) override { return events_left-- > 0; } @@ -74,7 +74,7 @@ class SaxCountdown : public nlohmann::json::json_sax_t return events_left-- > 0; } - bool key(std::string&&) override + bool key(std::string&) override { return events_left-- > 0; } @@ -1668,12 +1668,12 @@ TEST_CASE("UBJSON") std::vector vST1 = {'{', '$', 'd', '#', 'i', 2, 'i', 1, 'a'}; CHECK_THROWS_AS(json::from_ubjson(vST1), json::parse_error&); CHECK_THROWS_WITH(json::from_ubjson(vST1), "[json.exception.parse_error.110] parse error at 10: unexpected end of input"); - CHECK(json::from_ubjson(vST1, true, false).is_discarded()); + CHECK(json::from_ubjson(vST1, true, false).is_discarded()); std::vector vST2 = {'{', '#', 'i', 2, 'i', 1, 'a'}; CHECK_THROWS_AS(json::from_ubjson(vST2), json::parse_error&); CHECK_THROWS_WITH(json::from_ubjson(vST2), "[json.exception.parse_error.110] parse error at 8: unexpected end of input"); - CHECK(json::from_ubjson(vST2, true, false).is_discarded()); + CHECK(json::from_ubjson(vST2, true, false).is_discarded()); } } From 5f723bbec65b37b604a7860227e753a58cd97255 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Wed, 28 Mar 2018 23:39:39 +0200 Subject: [PATCH 34/43] :hammer: realized callback parser wirh SAX interface #971 --- include/nlohmann/detail/input/json_sax.hpp | 153 +++++-- include/nlohmann/detail/input/parser.hpp | 318 -------------- single_include/nlohmann/json.hpp | 471 ++++++--------------- test/src/unit-ubjson.cpp | 29 ++ 4 files changed, 273 insertions(+), 698 deletions(-) diff --git a/include/nlohmann/detail/input/json_sax.hpp b/include/nlohmann/detail/input/json_sax.hpp index eb92731f..0bc628cc 100644 --- a/include/nlohmann/detail/input/json_sax.hpp +++ b/include/nlohmann/detail/input/json_sax.hpp @@ -242,7 +242,7 @@ class json_sax_dom_parser : public json_sax return false; } - bool is_errored() const + constexpr bool is_errored() const { return errored; } @@ -303,7 +303,7 @@ class json_sax_dom_callback_parser : public json_sax using parse_event_t = typename BasicJsonType::parse_event_t; json_sax_dom_callback_parser(BasicJsonType& r, - const parser_callback_t cb = nullptr, + const parser_callback_t cb, const bool allow_exceptions_ = true) : root(r), callback(cb), allow_exceptions(allow_exceptions_) { @@ -348,15 +348,21 @@ class json_sax_dom_callback_parser : public json_sax bool start_object(std::size_t len) override { - const bool keep = callback(ref_stack.size() + 1, parse_event_t::object_start, discarded); + // check callback for object start + const bool keep = callback(static_cast(ref_stack.size()) + 1, parse_event_t::object_start, discarded); keep_stack.push_back(keep); - ref_stack.push_back(handle_value(BasicJsonType::value_t::object)); + auto val = handle_value(BasicJsonType::value_t::object); + ref_stack.push_back(val.second); - if (JSON_UNLIKELY(len != json_sax::no_limit and len > ref_stack.back()->max_size())) + // check object limit + if (ref_stack.back()) { - JSON_THROW(out_of_range::create(408, - "excessive object size: " + std::to_string(len))); + if (JSON_UNLIKELY(len != json_sax::no_limit and len > ref_stack.back()->max_size())) + { + JSON_THROW(out_of_range::create(408, + "excessive object size: " + std::to_string(len))); + } } return true; @@ -365,38 +371,73 @@ class json_sax_dom_callback_parser : public json_sax bool key(string_t& val) override { BasicJsonType k = BasicJsonType(val); - const bool keep = callback(ref_stack.size(), parse_event_t::key, k); - // add null at given key and store the reference for later - object_element = &(ref_stack.back()->m_value.object->operator[](val)); + // check callback for key + const bool keep = callback(static_cast(ref_stack.size()), parse_event_t::key, k); + key_keep_stack.push_back(keep); + + // add discarded value at given key and store the reference for later + if (keep and ref_stack.back()) + { + object_element = &(ref_stack.back()->m_value.object->operator[](val) = discarded); + } + return true; } bool end_object() override { - const bool keep = callback(ref_stack.size() - 1, parse_event_t::object_end, *ref_stack.back()); - if (not keep) + bool keep = true; + if (ref_stack.back()) { - // discard object - *ref_stack.back() = discarded; + keep = callback(static_cast(ref_stack.size()) - 1, parse_event_t::object_end, *ref_stack.back()); + if (not keep) + { + // discard object + *ref_stack.back() = discarded; + } } + assert(not ref_stack.empty()); + assert(not keep_stack.empty()); ref_stack.pop_back(); keep_stack.pop_back(); + + if (not ref_stack.empty() and ref_stack.back()) + { + // remove discarded value + if (ref_stack.back()->is_object()) + { + for (auto it = ref_stack.back()->begin(); it != ref_stack.back()->end(); ++it) + { + if (it->is_discarded()) + { + ref_stack.back()->erase(it); + break; + } + } + } + } + return true; } bool start_array(std::size_t len) override { - const bool keep = callback(ref_stack.size() + 1, parse_event_t::array_start, discarded); + const bool keep = callback(static_cast(ref_stack.size()) + 1, parse_event_t::array_start, discarded); keep_stack.push_back(keep); - ref_stack.push_back(handle_value(BasicJsonType::value_t::array)); + auto val = handle_value(BasicJsonType::value_t::array); + ref_stack.push_back(val.second); - if (JSON_UNLIKELY(len != json_sax::no_limit and len > ref_stack.back()->max_size())) + // check array limit + if (ref_stack.back()) { - JSON_THROW(out_of_range::create(408, - "excessive array size: " + std::to_string(len))); + if (JSON_UNLIKELY(len != json_sax::no_limit and len > ref_stack.back()->max_size())) + { + JSON_THROW(out_of_range::create(408, + "excessive array size: " + std::to_string(len))); + } } return true; @@ -404,15 +445,32 @@ class json_sax_dom_callback_parser : public json_sax bool end_array() override { - const bool keep = callback(ref_stack.size() - 1, parse_event_t::array_end, *ref_stack.back()); - if (not keep) + bool keep = true; + + if (ref_stack.back()) { - // discard array - *ref_stack.back() = discarded; + keep = callback(static_cast(ref_stack.size()) - 1, parse_event_t::array_end, *ref_stack.back()); + if (not keep) + { + // discard array + *ref_stack.back() = discarded; + } } + assert(not ref_stack.empty()); + assert(not keep_stack.empty()); ref_stack.pop_back(); keep_stack.pop_back(); + + // remove discarded value + if (not keep and not ref_stack.empty()) + { + if (ref_stack.back()->is_array()) + { + ref_stack.back()->m_value.array->pop_back(); + } + } + return true; } @@ -442,7 +500,7 @@ class json_sax_dom_callback_parser : public json_sax return false; } - bool is_errored() const + constexpr bool is_errored() const { return errored; } @@ -453,28 +511,59 @@ class json_sax_dom_callback_parser : public json_sax root. @invariant If the ref stack contains a value, then it is an array or an object to which we can add elements + @return pair of boolean (whether value should be kept) and pointer (to the + passed value in the ref_stack hierarchy; nullptr if not kept) */ template - BasicJsonType* handle_value(Value&& v) + std::pair handle_value(Value&& v) { + assert(not keep_stack.empty()); + + // do not handle this value if we know it would be added to a discarded + // container + if (not keep_stack.back()) + { + return {false, nullptr}; + } + + // create value and check callback + auto value = BasicJsonType(std::forward(v)); + const bool keep = callback(static_cast(ref_stack.size()), parse_event_t::value, value); + + // do not handle this value if we just learnt it shall be discarded + if (not keep) + { + return {false, nullptr}; + } + if (ref_stack.empty()) { - root = BasicJsonType(std::forward(v)); - return &root; + root = std::move(value); + return {true, &root}; } else { assert(ref_stack.back()->is_array() or ref_stack.back()->is_object()); if (ref_stack.back()->is_array()) { - ref_stack.back()->m_value.array->emplace_back(std::forward(v)); - return &(ref_stack.back()->m_value.array->back()); + ref_stack.back()->m_value.array->push_back(std::move(value)); + return {true, &(ref_stack.back()->m_value.array->back())}; } else { + // check if we should store an element for the current key + assert(not key_keep_stack.empty()); + const bool store_element = key_keep_stack.back(); + key_keep_stack.pop_back(); + + if (not store_element) + { + return {false, nullptr}; + } + assert(object_element); - *object_element = BasicJsonType(std::forward(v)); - return object_element; + *object_element = std::move(value); + return {true, object_element}; } } } @@ -485,6 +574,8 @@ class json_sax_dom_callback_parser : public json_sax std::vector ref_stack; /// stack to manage which values to keep std::vector keep_stack; + /// stack to manage which object keys to keep + std::vector key_keep_stack; /// helper to hold the reference for the next object element BasicJsonType* object_element = nullptr; /// whether a syntax error occurred diff --git a/include/nlohmann/detail/input/parser.hpp b/include/nlohmann/detail/input/parser.hpp index d8f1ff34..de16ef05 100644 --- a/include/nlohmann/detail/input/parser.hpp +++ b/include/nlohmann/detail/input/parser.hpp @@ -83,7 +83,6 @@ class parser { if (callback) { - /* json_sax_dom_callback_parser sdp(result, callback, allow_exceptions); sax_parse_internal(&sdp); result.assert_invariant(); @@ -102,24 +101,6 @@ class parser result = value_t::discarded; return; } - */ - - parse_internal(true, result); - result.assert_invariant(); - - // in strict mode, input must be completely read - if (strict) - { - get_token(); - expect(token_type::end_of_input); - } - - // in case of an error, return discarded value - if (errored) - { - result = value_t::discarded; - return; - } // set top-level value to null if it was discarded by the callback // function @@ -179,280 +160,6 @@ class parser } private: - /*! - @brief the actual parser - @throw parse_error.101 in case of an unexpected token - @throw parse_error.102 if to_unicode fails or surrogate error - @throw parse_error.103 if to_unicode fails - */ - void parse_internal(bool keep, BasicJsonType& result) - { - // never parse after a parse error was detected - assert(not errored); - // this function is only called when a callback is given - assert(callback); - - // start with a discarded value - if (not result.is_discarded()) - { - result.m_value.destroy(result.m_type); - result.m_type = value_t::discarded; - } - - switch (last_token) - { - case token_type::begin_object: - { - if (keep) - { - keep = callback(depth++, parse_event_t::object_start, result); - - if (keep) - { - // explicitly set result to object to cope with {} - result.m_type = value_t::object; - result.m_value = value_t::object; - } - } - - // read next token - get_token(); - - // closing } -> we are done - if (last_token == token_type::end_object) - { - if (keep and not callback(--depth, parse_event_t::object_end, result)) - { - result.m_value.destroy(result.m_type); - result.m_type = value_t::discarded; - } - break; - } - - // parse values - string_t key; - BasicJsonType value; - while (true) - { - // store key - if (not expect(token_type::value_string)) - { - return; - } - key = m_lexer.get_string(); - - bool keep_tag = false; - if (keep) - { - BasicJsonType k(key); - keep_tag = callback(depth, parse_event_t::key, k); - } - - // parse separator (:) - get_token(); - if (not expect(token_type::name_separator)) - { - return; - } - - // parse and add value - get_token(); - value.m_value.destroy(value.m_type); - value.m_type = value_t::discarded; - parse_internal(keep, value); - - if (JSON_UNLIKELY(errored)) - { - return; - } - - if (keep and keep_tag and not value.is_discarded()) - { - result.m_value.object->emplace(std::move(key), std::move(value)); - } - - // comma -> next value - get_token(); - if (last_token == token_type::value_separator) - { - get_token(); - continue; - } - - // closing } - if (not expect(token_type::end_object)) - { - return; - } - break; - } - - if (keep and not callback(--depth, parse_event_t::object_end, result)) - { - result.m_value.destroy(result.m_type); - result.m_type = value_t::discarded; - } - break; - } - - case token_type::begin_array: - { - if (keep) - { - keep = callback(depth++, parse_event_t::array_start, result); - - if (keep) - { - // explicitly set result to array to cope with [] - result.m_type = value_t::array; - result.m_value = value_t::array; - } - } - - // read next token - get_token(); - - // closing ] -> we are done - if (last_token == token_type::end_array) - { - if (not callback(--depth, parse_event_t::array_end, result)) - { - result.m_value.destroy(result.m_type); - result.m_type = value_t::discarded; - } - break; - } - - // parse values - BasicJsonType value; - while (true) - { - // parse value - value.m_value.destroy(value.m_type); - value.m_type = value_t::discarded; - parse_internal(keep, value); - - if (JSON_UNLIKELY(errored)) - { - return; - } - - if (keep and not value.is_discarded()) - { - result.m_value.array->push_back(std::move(value)); - } - - // comma -> next value - get_token(); - if (last_token == token_type::value_separator) - { - get_token(); - continue; - } - - // closing ] - if (not expect(token_type::end_array)) - { - return; - } - break; - } - - if (keep and not callback(--depth, parse_event_t::array_end, result)) - { - result.m_value.destroy(result.m_type); - result.m_type = value_t::discarded; - } - break; - } - - case token_type::literal_null: - { - result.m_type = value_t::null; - break; - } - - case token_type::value_string: - { - result.m_type = value_t::string; - result.m_value = m_lexer.get_string(); - break; - } - - case token_type::literal_true: - { - result.m_type = value_t::boolean; - result.m_value = true; - break; - } - - case token_type::literal_false: - { - result.m_type = value_t::boolean; - result.m_value = false; - break; - } - - case token_type::value_unsigned: - { - result.m_type = value_t::number_unsigned; - result.m_value = m_lexer.get_number_unsigned(); - break; - } - - case token_type::value_integer: - { - result.m_type = value_t::number_integer; - result.m_value = m_lexer.get_number_integer(); - break; - } - - case token_type::value_float: - { - result.m_type = value_t::number_float; - result.m_value = m_lexer.get_number_float(); - - // throw in case of infinity or NAN - if (JSON_UNLIKELY(not std::isfinite(result.m_value.number_float))) - { - if (allow_exceptions) - { - JSON_THROW(out_of_range::create(406, "number overflow parsing '" + - m_lexer.get_token_string() + "'")); - } - expect(token_type::uninitialized); - } - break; - } - - case token_type::parse_error: - { - // using "uninitialized" to avoid "expected" message - if (not expect(token_type::uninitialized)) - { - return; - } - break; // LCOV_EXCL_LINE - } - - default: - { - // the last token was unexpected; we expected a value - if (not expect(token_type::literal_or_value)) - { - return; - } - break; // LCOV_EXCL_LINE - } - } - - if (keep and not callback(depth, parse_event_t::value, result)) - { - result.m_value.destroy(result.m_type); - result.m_type = value_t::discarded; - } - } - bool sax_parse_internal(json_sax_t* sax) { // two values for the structured values @@ -762,27 +469,6 @@ class parser return (last_token = m_lexer.scan()); } - /*! - @throw parse_error.101 if expected token did not occur - */ - bool expect(token_type t) - { - if (JSON_UNLIKELY(t != last_token)) - { - errored = true; - if (allow_exceptions) - { - JSON_THROW(parse_error::create(101, m_lexer.get_position(), exception_message(t))); - } - else - { - return false; - } - } - - return true; - } - std::string exception_message(const token_type expected) { std::string error_msg = "syntax error - "; @@ -805,16 +491,12 @@ class parser } private: - /// current level of recursion - int depth = 0; /// callback function const parser_callback_t callback = nullptr; /// the type of the last read token token_type last_token = token_type::uninitialized; /// the lexer lexer_t m_lexer; - /// whether a syntax error occurred - bool errored = false; /// whether to throw exceptions in case of errors const bool allow_exceptions = true; }; diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index de1f6bfe..6fb619d6 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -3382,7 +3382,7 @@ class json_sax_dom_parser : public json_sax return false; } - bool is_errored() const + constexpr bool is_errored() const { return errored; } @@ -3443,7 +3443,7 @@ class json_sax_dom_callback_parser : public json_sax using parse_event_t = typename BasicJsonType::parse_event_t; json_sax_dom_callback_parser(BasicJsonType& r, - const parser_callback_t cb = nullptr, + const parser_callback_t cb, const bool allow_exceptions_ = true) : root(r), callback(cb), allow_exceptions(allow_exceptions_) { @@ -3488,15 +3488,21 @@ class json_sax_dom_callback_parser : public json_sax bool start_object(std::size_t len) override { - const bool keep = callback(ref_stack.size() + 1, parse_event_t::object_start, discarded); + // check callback for object start + const bool keep = callback(static_cast(ref_stack.size()) + 1, parse_event_t::object_start, discarded); keep_stack.push_back(keep); - ref_stack.push_back(handle_value(BasicJsonType::value_t::object)); + auto val = handle_value(BasicJsonType::value_t::object); + ref_stack.push_back(val.second); - if (JSON_UNLIKELY(len != json_sax::no_limit and len > ref_stack.back()->max_size())) + // check object limit + if (ref_stack.back()) { - JSON_THROW(out_of_range::create(408, - "excessive object size: " + std::to_string(len))); + if (JSON_UNLIKELY(len != json_sax::no_limit and len > ref_stack.back()->max_size())) + { + JSON_THROW(out_of_range::create(408, + "excessive object size: " + std::to_string(len))); + } } return true; @@ -3505,38 +3511,73 @@ class json_sax_dom_callback_parser : public json_sax bool key(string_t& val) override { BasicJsonType k = BasicJsonType(val); - const bool keep = callback(ref_stack.size(), parse_event_t::key, k); - // add null at given key and store the reference for later - object_element = &(ref_stack.back()->m_value.object->operator[](val)); + // check callback for key + const bool keep = callback(static_cast(ref_stack.size()), parse_event_t::key, k); + key_keep_stack.push_back(keep); + + // add discarded value at given key and store the reference for later + if (keep and ref_stack.back()) + { + object_element = &(ref_stack.back()->m_value.object->operator[](val) = discarded); + } + return true; } bool end_object() override { - const bool keep = callback(ref_stack.size() - 1, parse_event_t::object_end, *ref_stack.back()); - if (not keep) + bool keep = true; + if (ref_stack.back()) { - // discard object - *ref_stack.back() = discarded; + keep = callback(static_cast(ref_stack.size()) - 1, parse_event_t::object_end, *ref_stack.back()); + if (not keep) + { + // discard object + *ref_stack.back() = discarded; + } } + assert(not ref_stack.empty()); + assert(not keep_stack.empty()); ref_stack.pop_back(); keep_stack.pop_back(); + + if (not ref_stack.empty() and ref_stack.back()) + { + // remove discarded value + if (ref_stack.back()->is_object()) + { + for (auto it = ref_stack.back()->begin(); it != ref_stack.back()->end(); ++it) + { + if (it->is_discarded()) + { + ref_stack.back()->erase(it); + break; + } + } + } + } + return true; } bool start_array(std::size_t len) override { - const bool keep = callback(ref_stack.size() + 1, parse_event_t::array_start, discarded); + const bool keep = callback(static_cast(ref_stack.size()) + 1, parse_event_t::array_start, discarded); keep_stack.push_back(keep); - ref_stack.push_back(handle_value(BasicJsonType::value_t::array)); + auto val = handle_value(BasicJsonType::value_t::array); + ref_stack.push_back(val.second); - if (JSON_UNLIKELY(len != json_sax::no_limit and len > ref_stack.back()->max_size())) + // check array limit + if (ref_stack.back()) { - JSON_THROW(out_of_range::create(408, - "excessive array size: " + std::to_string(len))); + if (JSON_UNLIKELY(len != json_sax::no_limit and len > ref_stack.back()->max_size())) + { + JSON_THROW(out_of_range::create(408, + "excessive array size: " + std::to_string(len))); + } } return true; @@ -3544,15 +3585,32 @@ class json_sax_dom_callback_parser : public json_sax bool end_array() override { - const bool keep = callback(ref_stack.size() - 1, parse_event_t::array_end, *ref_stack.back()); - if (not keep) + bool keep = true; + + if (ref_stack.back()) { - // discard array - *ref_stack.back() = discarded; + keep = callback(static_cast(ref_stack.size()) - 1, parse_event_t::array_end, *ref_stack.back()); + if (not keep) + { + // discard array + *ref_stack.back() = discarded; + } } + assert(not ref_stack.empty()); + assert(not keep_stack.empty()); ref_stack.pop_back(); keep_stack.pop_back(); + + // remove discarded value + if (not keep and not ref_stack.empty()) + { + if (ref_stack.back()->is_array()) + { + ref_stack.back()->m_value.array->pop_back(); + } + } + return true; } @@ -3582,7 +3640,7 @@ class json_sax_dom_callback_parser : public json_sax return false; } - bool is_errored() const + constexpr bool is_errored() const { return errored; } @@ -3593,28 +3651,59 @@ class json_sax_dom_callback_parser : public json_sax root. @invariant If the ref stack contains a value, then it is an array or an object to which we can add elements + @return pair of boolean (whether value should be kept) and pointer (to the + passed value in the ref_stack hierarchy; nullptr if not kept) */ template - BasicJsonType* handle_value(Value&& v) + std::pair handle_value(Value&& v) { + assert(not keep_stack.empty()); + + // do not handle this value if we know it would be added to a discarded + // container + if (not keep_stack.back()) + { + return {false, nullptr}; + } + + // create value and check callback + auto value = BasicJsonType(std::forward(v)); + const bool keep = callback(static_cast(ref_stack.size()), parse_event_t::value, value); + + // do not handle this value if we just learnt it shall be discarded + if (not keep) + { + return {false, nullptr}; + } + if (ref_stack.empty()) { - root = BasicJsonType(std::forward(v)); - return &root; + root = std::move(value); + return {true, &root}; } else { assert(ref_stack.back()->is_array() or ref_stack.back()->is_object()); if (ref_stack.back()->is_array()) { - ref_stack.back()->m_value.array->emplace_back(std::forward(v)); - return &(ref_stack.back()->m_value.array->back()); + ref_stack.back()->m_value.array->push_back(std::move(value)); + return {true, &(ref_stack.back()->m_value.array->back())}; } else { + // check if we should store an element for the current key + assert(not key_keep_stack.empty()); + const bool store_element = key_keep_stack.back(); + key_keep_stack.pop_back(); + + if (not store_element) + { + return {false, nullptr}; + } + assert(object_element); - *object_element = BasicJsonType(std::forward(v)); - return object_element; + *object_element = std::move(value); + return {true, object_element}; } } } @@ -3625,6 +3714,8 @@ class json_sax_dom_callback_parser : public json_sax std::vector ref_stack; /// stack to manage which values to keep std::vector keep_stack; + /// stack to manage which object keys to keep + std::vector key_keep_stack; /// helper to hold the reference for the next object element BasicJsonType* object_element = nullptr; /// whether a syntax error occurred @@ -3785,7 +3876,6 @@ class parser { if (callback) { - /* json_sax_dom_callback_parser sdp(result, callback, allow_exceptions); sax_parse_internal(&sdp); result.assert_invariant(); @@ -3804,24 +3894,6 @@ class parser result = value_t::discarded; return; } - */ - - parse_internal(true, result); - result.assert_invariant(); - - // in strict mode, input must be completely read - if (strict) - { - get_token(); - expect(token_type::end_of_input); - } - - // in case of an error, return discarded value - if (errored) - { - result = value_t::discarded; - return; - } // set top-level value to null if it was discarded by the callback // function @@ -3881,280 +3953,6 @@ class parser } private: - /*! - @brief the actual parser - @throw parse_error.101 in case of an unexpected token - @throw parse_error.102 if to_unicode fails or surrogate error - @throw parse_error.103 if to_unicode fails - */ - void parse_internal(bool keep, BasicJsonType& result) - { - // never parse after a parse error was detected - assert(not errored); - // this function is only called when a callback is given - assert(callback); - - // start with a discarded value - if (not result.is_discarded()) - { - result.m_value.destroy(result.m_type); - result.m_type = value_t::discarded; - } - - switch (last_token) - { - case token_type::begin_object: - { - if (keep) - { - keep = callback(depth++, parse_event_t::object_start, result); - - if (keep) - { - // explicitly set result to object to cope with {} - result.m_type = value_t::object; - result.m_value = value_t::object; - } - } - - // read next token - get_token(); - - // closing } -> we are done - if (last_token == token_type::end_object) - { - if (keep and not callback(--depth, parse_event_t::object_end, result)) - { - result.m_value.destroy(result.m_type); - result.m_type = value_t::discarded; - } - break; - } - - // parse values - string_t key; - BasicJsonType value; - while (true) - { - // store key - if (not expect(token_type::value_string)) - { - return; - } - key = m_lexer.get_string(); - - bool keep_tag = false; - if (keep) - { - BasicJsonType k(key); - keep_tag = callback(depth, parse_event_t::key, k); - } - - // parse separator (:) - get_token(); - if (not expect(token_type::name_separator)) - { - return; - } - - // parse and add value - get_token(); - value.m_value.destroy(value.m_type); - value.m_type = value_t::discarded; - parse_internal(keep, value); - - if (JSON_UNLIKELY(errored)) - { - return; - } - - if (keep and keep_tag and not value.is_discarded()) - { - result.m_value.object->emplace(std::move(key), std::move(value)); - } - - // comma -> next value - get_token(); - if (last_token == token_type::value_separator) - { - get_token(); - continue; - } - - // closing } - if (not expect(token_type::end_object)) - { - return; - } - break; - } - - if (keep and not callback(--depth, parse_event_t::object_end, result)) - { - result.m_value.destroy(result.m_type); - result.m_type = value_t::discarded; - } - break; - } - - case token_type::begin_array: - { - if (keep) - { - keep = callback(depth++, parse_event_t::array_start, result); - - if (keep) - { - // explicitly set result to array to cope with [] - result.m_type = value_t::array; - result.m_value = value_t::array; - } - } - - // read next token - get_token(); - - // closing ] -> we are done - if (last_token == token_type::end_array) - { - if (not callback(--depth, parse_event_t::array_end, result)) - { - result.m_value.destroy(result.m_type); - result.m_type = value_t::discarded; - } - break; - } - - // parse values - BasicJsonType value; - while (true) - { - // parse value - value.m_value.destroy(value.m_type); - value.m_type = value_t::discarded; - parse_internal(keep, value); - - if (JSON_UNLIKELY(errored)) - { - return; - } - - if (keep and not value.is_discarded()) - { - result.m_value.array->push_back(std::move(value)); - } - - // comma -> next value - get_token(); - if (last_token == token_type::value_separator) - { - get_token(); - continue; - } - - // closing ] - if (not expect(token_type::end_array)) - { - return; - } - break; - } - - if (keep and not callback(--depth, parse_event_t::array_end, result)) - { - result.m_value.destroy(result.m_type); - result.m_type = value_t::discarded; - } - break; - } - - case token_type::literal_null: - { - result.m_type = value_t::null; - break; - } - - case token_type::value_string: - { - result.m_type = value_t::string; - result.m_value = m_lexer.get_string(); - break; - } - - case token_type::literal_true: - { - result.m_type = value_t::boolean; - result.m_value = true; - break; - } - - case token_type::literal_false: - { - result.m_type = value_t::boolean; - result.m_value = false; - break; - } - - case token_type::value_unsigned: - { - result.m_type = value_t::number_unsigned; - result.m_value = m_lexer.get_number_unsigned(); - break; - } - - case token_type::value_integer: - { - result.m_type = value_t::number_integer; - result.m_value = m_lexer.get_number_integer(); - break; - } - - case token_type::value_float: - { - result.m_type = value_t::number_float; - result.m_value = m_lexer.get_number_float(); - - // throw in case of infinity or NAN - if (JSON_UNLIKELY(not std::isfinite(result.m_value.number_float))) - { - if (allow_exceptions) - { - JSON_THROW(out_of_range::create(406, "number overflow parsing '" + - m_lexer.get_token_string() + "'")); - } - expect(token_type::uninitialized); - } - break; - } - - case token_type::parse_error: - { - // using "uninitialized" to avoid "expected" message - if (not expect(token_type::uninitialized)) - { - return; - } - break; // LCOV_EXCL_LINE - } - - default: - { - // the last token was unexpected; we expected a value - if (not expect(token_type::literal_or_value)) - { - return; - } - break; // LCOV_EXCL_LINE - } - } - - if (keep and not callback(depth, parse_event_t::value, result)) - { - result.m_value.destroy(result.m_type); - result.m_type = value_t::discarded; - } - } - bool sax_parse_internal(json_sax_t* sax) { // two values for the structured values @@ -4464,27 +4262,6 @@ class parser return (last_token = m_lexer.scan()); } - /*! - @throw parse_error.101 if expected token did not occur - */ - bool expect(token_type t) - { - if (JSON_UNLIKELY(t != last_token)) - { - errored = true; - if (allow_exceptions) - { - JSON_THROW(parse_error::create(101, m_lexer.get_position(), exception_message(t))); - } - else - { - return false; - } - } - - return true; - } - std::string exception_message(const token_type expected) { std::string error_msg = "syntax error - "; @@ -4507,16 +4284,12 @@ class parser } private: - /// current level of recursion - int depth = 0; /// callback function const parser_callback_t callback = nullptr; /// the type of the last read token token_type last_token = token_type::uninitialized; /// the lexer lexer_t m_lexer; - /// whether a syntax error occurred - bool errored = false; /// whether to throw exceptions in case of errors const bool allow_exceptions = true; }; diff --git a/test/src/unit-ubjson.cpp b/test/src/unit-ubjson.cpp index 380e0f3f..791668ff 100644 --- a/test/src/unit-ubjson.cpp +++ b/test/src/unit-ubjson.cpp @@ -1309,6 +1309,35 @@ TEST_CASE("UBJSON") CHECK_THROWS_AS(json::to_ubjson(j), json::out_of_range&); CHECK_THROWS_WITH(json::to_ubjson(j), "[json.exception.out_of_range.407] number overflow serializing 9223372036854775808"); } + + SECTION("excessive size") + { + SECTION("array") + { + std::vector v_ubjson = {'[', '$', 'Z', '#', 'L', 0x78, 0x28, 0x00, 0x68, 0x28, 0x69, 0x69, 0x17}; + CHECK_THROWS_AS(json::from_ubjson(v_ubjson), json::out_of_range&); + + json j; + nlohmann::detail::json_sax_dom_callback_parser scp(j, [](int, json::parse_event_t, const json&) + { + return true; + }); + CHECK_THROWS_AS(json::sax_parse(v_ubjson, &scp, json::input_format_t::ubjson), json::out_of_range&); + } + + SECTION("object") + { + std::vector v_ubjson = {'{', '$', 'Z', '#', 'L', 0x78, 0x28, 0x00, 0x68, 0x28, 0x69, 0x69, 0x17}; + CHECK_THROWS_AS(json::from_ubjson(v_ubjson), json::out_of_range&); + + json j; + nlohmann::detail::json_sax_dom_callback_parser scp(j, [](int, json::parse_event_t, const json&) + { + return true; + }); + CHECK_THROWS_AS(json::sax_parse(v_ubjson, &scp, json::input_format_t::ubjson), json::out_of_range&); + } + } } SECTION("SAX aborts") From 850671b9f1adab7bd1ec1779ce9ca29bd66b904d Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Thu, 29 Mar 2018 18:45:43 +0200 Subject: [PATCH 35/43] :hammer: using a vector for the parser hierarchy --- include/nlohmann/detail/input/parser.hpp | 173 +++++++++++------------ single_include/nlohmann/json.hpp | 173 +++++++++++------------ 2 files changed, 166 insertions(+), 180 deletions(-) diff --git a/include/nlohmann/detail/input/parser.hpp b/include/nlohmann/detail/input/parser.hpp index de16ef05..67c94464 100644 --- a/include/nlohmann/detail/input/parser.hpp +++ b/include/nlohmann/detail/input/parser.hpp @@ -162,10 +162,9 @@ class parser private: bool sax_parse_internal(json_sax_t* sax) { - // two values for the structured values - enum class parse_state_t { array_value, object_value }; // stack to remember the hieararchy of structured values we are parsing - std::vector states; + // true = array; false = object + std::vector states; // value to avoid a goto (see comment where set to true) bool skip_to_state_evaluation = false; @@ -221,7 +220,7 @@ class parser } // remember we are now inside an object - states.push_back(parse_state_t::object_value); + states.push_back(false); // parse values get_token(); @@ -249,7 +248,7 @@ class parser } // remember we are now inside an array - states.push_back(parse_state_t::array_value); + states.push_back(true); // parse values (no need to call get_token) continue; @@ -359,104 +358,98 @@ class parser else { get_token(); - switch (states.back()) + if (states.back()) // array { - case parse_state_t::array_value: + // comma -> next value + if (last_token == token_type::value_separator) { - // comma -> next value - if (last_token == token_type::value_separator) - { - // parse a new value - get_token(); - continue; - } - - // closing ] - if (JSON_LIKELY(last_token == token_type::end_array)) - { - if (JSON_UNLIKELY(not sax->end_array())) - { - return false; - } - - // We are done with this array. Before we can parse - // a new value, we need to evaluate the new state - // first. By setting skip_to_state_evaluation to - // false, we are effectively jumping to the - // beginning of this switch. - assert(not states.empty()); - states.pop_back(); - skip_to_state_evaluation = true; - continue; - } - else - { - return sax->parse_error(m_lexer.get_position(), - m_lexer.get_token_string(), - parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_array))); - } + // parse a new value + get_token(); + continue; } - case parse_state_t::object_value: + // closing ] + if (JSON_LIKELY(last_token == token_type::end_array)) { - // comma -> next value - if (last_token == token_type::value_separator) + if (JSON_UNLIKELY(not sax->end_array())) { - get_token(); - - // parse key - if (JSON_UNLIKELY(last_token != token_type::value_string)) - { - return sax->parse_error(m_lexer.get_position(), - m_lexer.get_token_string(), - parse_error::create(101, m_lexer.get_position(), exception_message(token_type::value_string))); - } - else - { - if (JSON_UNLIKELY(not sax->key(m_lexer.get_string()))) - { - return false; - } - } - - // parse separator (:) - get_token(); - if (JSON_UNLIKELY(last_token != token_type::name_separator)) - { - return sax->parse_error(m_lexer.get_position(), - m_lexer.get_token_string(), - parse_error::create(101, m_lexer.get_position(), exception_message(token_type::name_separator))); - } - - // parse values - get_token(); - continue; + return false; } - // closing } - if (JSON_LIKELY(last_token == token_type::end_object)) - { - if (JSON_UNLIKELY(not sax->end_object())) - { - return false; - } + // We are done with this array. Before we can parse a + // new value, we need to evaluate the new state first. + // By setting skip_to_state_evaluation to false, we + // are effectively jumping to the beginning of this if. + assert(not states.empty()); + states.pop_back(); + skip_to_state_evaluation = true; + continue; + } + else + { + return sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_array))); + } + } + else // object + { + // comma -> next value + if (last_token == token_type::value_separator) + { + get_token(); - // We are done with this object. Before we can - // parse a new value, we need to evaluate the new - // state first. By setting skip_to_state_evaluation - // to false, we are effectively jumping to the - // beginning of this switch. - assert(not states.empty()); - states.pop_back(); - skip_to_state_evaluation = true; - continue; - } - else + // parse key + if (JSON_UNLIKELY(last_token != token_type::value_string)) { return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string(), - parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_object))); + parse_error::create(101, m_lexer.get_position(), exception_message(token_type::value_string))); } + else + { + if (JSON_UNLIKELY(not sax->key(m_lexer.get_string()))) + { + return false; + } + } + + // parse separator (:) + get_token(); + if (JSON_UNLIKELY(last_token != token_type::name_separator)) + { + return sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + parse_error::create(101, m_lexer.get_position(), exception_message(token_type::name_separator))); + } + + // parse values + get_token(); + continue; + } + + // closing } + if (JSON_LIKELY(last_token == token_type::end_object)) + { + if (JSON_UNLIKELY(not sax->end_object())) + { + return false; + } + + // We are done with this object. Before we can parse a + // new value, we need to evaluate the new state first. + // By setting skip_to_state_evaluation to false, we + // are effectively jumping to the beginning of this if. + assert(not states.empty()); + states.pop_back(); + skip_to_state_evaluation = true; + continue; + } + else + { + return sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_object))); } } } diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index 6fb619d6..9df10e1a 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -3955,10 +3955,9 @@ class parser private: bool sax_parse_internal(json_sax_t* sax) { - // two values for the structured values - enum class parse_state_t { array_value, object_value }; // stack to remember the hieararchy of structured values we are parsing - std::vector states; + // true = array; false = object + std::vector states; // value to avoid a goto (see comment where set to true) bool skip_to_state_evaluation = false; @@ -4014,7 +4013,7 @@ class parser } // remember we are now inside an object - states.push_back(parse_state_t::object_value); + states.push_back(false); // parse values get_token(); @@ -4042,7 +4041,7 @@ class parser } // remember we are now inside an array - states.push_back(parse_state_t::array_value); + states.push_back(true); // parse values (no need to call get_token) continue; @@ -4152,104 +4151,98 @@ class parser else { get_token(); - switch (states.back()) + if (states.back()) // array { - case parse_state_t::array_value: + // comma -> next value + if (last_token == token_type::value_separator) { - // comma -> next value - if (last_token == token_type::value_separator) - { - // parse a new value - get_token(); - continue; - } - - // closing ] - if (JSON_LIKELY(last_token == token_type::end_array)) - { - if (JSON_UNLIKELY(not sax->end_array())) - { - return false; - } - - // We are done with this array. Before we can parse - // a new value, we need to evaluate the new state - // first. By setting skip_to_state_evaluation to - // false, we are effectively jumping to the - // beginning of this switch. - assert(not states.empty()); - states.pop_back(); - skip_to_state_evaluation = true; - continue; - } - else - { - return sax->parse_error(m_lexer.get_position(), - m_lexer.get_token_string(), - parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_array))); - } + // parse a new value + get_token(); + continue; } - case parse_state_t::object_value: + // closing ] + if (JSON_LIKELY(last_token == token_type::end_array)) { - // comma -> next value - if (last_token == token_type::value_separator) + if (JSON_UNLIKELY(not sax->end_array())) { - get_token(); - - // parse key - if (JSON_UNLIKELY(last_token != token_type::value_string)) - { - return sax->parse_error(m_lexer.get_position(), - m_lexer.get_token_string(), - parse_error::create(101, m_lexer.get_position(), exception_message(token_type::value_string))); - } - else - { - if (JSON_UNLIKELY(not sax->key(m_lexer.get_string()))) - { - return false; - } - } - - // parse separator (:) - get_token(); - if (JSON_UNLIKELY(last_token != token_type::name_separator)) - { - return sax->parse_error(m_lexer.get_position(), - m_lexer.get_token_string(), - parse_error::create(101, m_lexer.get_position(), exception_message(token_type::name_separator))); - } - - // parse values - get_token(); - continue; + return false; } - // closing } - if (JSON_LIKELY(last_token == token_type::end_object)) - { - if (JSON_UNLIKELY(not sax->end_object())) - { - return false; - } + // We are done with this array. Before we can parse a + // new value, we need to evaluate the new state first. + // By setting skip_to_state_evaluation to false, we + // are effectively jumping to the beginning of this if. + assert(not states.empty()); + states.pop_back(); + skip_to_state_evaluation = true; + continue; + } + else + { + return sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_array))); + } + } + else // object + { + // comma -> next value + if (last_token == token_type::value_separator) + { + get_token(); - // We are done with this object. Before we can - // parse a new value, we need to evaluate the new - // state first. By setting skip_to_state_evaluation - // to false, we are effectively jumping to the - // beginning of this switch. - assert(not states.empty()); - states.pop_back(); - skip_to_state_evaluation = true; - continue; - } - else + // parse key + if (JSON_UNLIKELY(last_token != token_type::value_string)) { return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string(), - parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_object))); + parse_error::create(101, m_lexer.get_position(), exception_message(token_type::value_string))); } + else + { + if (JSON_UNLIKELY(not sax->key(m_lexer.get_string()))) + { + return false; + } + } + + // parse separator (:) + get_token(); + if (JSON_UNLIKELY(last_token != token_type::name_separator)) + { + return sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + parse_error::create(101, m_lexer.get_position(), exception_message(token_type::name_separator))); + } + + // parse values + get_token(); + continue; + } + + // closing } + if (JSON_LIKELY(last_token == token_type::end_object)) + { + if (JSON_UNLIKELY(not sax->end_object())) + { + return false; + } + + // We are done with this object. Before we can parse a + // new value, we need to evaluate the new state first. + // By setting skip_to_state_evaluation to false, we + // are effectively jumping to the beginning of this if. + assert(not states.empty()); + states.pop_back(); + skip_to_state_evaluation = true; + continue; + } + else + { + return sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_object))); } } } From ba6edd5634d0ba360ea79663dab70a6177687a92 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Fri, 30 Mar 2018 00:38:18 +0200 Subject: [PATCH 36/43] :hammer: cleanup --- include/nlohmann/detail/input/parser.hpp | 25 +++++++----------------- single_include/nlohmann/json.hpp | 25 +++++++----------------- 2 files changed, 14 insertions(+), 36 deletions(-) diff --git a/include/nlohmann/detail/input/parser.hpp b/include/nlohmann/detail/input/parser.hpp index 67c94464..c3cfcd53 100644 --- a/include/nlohmann/detail/input/parser.hpp +++ b/include/nlohmann/detail/input/parser.hpp @@ -182,11 +182,8 @@ class parser return false; } - // read next token - get_token(); - // closing } -> we are done - if (last_token == token_type::end_object) + if (get_token() == token_type::end_object) { if (JSON_UNLIKELY(not sax->end_object())) { @@ -211,8 +208,7 @@ class parser } // parse separator (:) - get_token(); - if (JSON_UNLIKELY(last_token != token_type::name_separator)) + if (JSON_UNLIKELY(get_token() != token_type::name_separator)) { return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string(), @@ -234,11 +230,8 @@ class parser return false; } - // read next token - get_token(); - // closing ] -> we are done - if (last_token == token_type::end_array) + if (get_token() == token_type::end_array) { if (JSON_UNLIKELY(not sax->end_array())) { @@ -357,11 +350,10 @@ class parser } else { - get_token(); if (states.back()) // array { // comma -> next value - if (last_token == token_type::value_separator) + if (get_token() == token_type::value_separator) { // parse a new value get_token(); @@ -395,12 +387,10 @@ class parser else // object { // comma -> next value - if (last_token == token_type::value_separator) + if (get_token() == token_type::value_separator) { - get_token(); - // parse key - if (JSON_UNLIKELY(last_token != token_type::value_string)) + if (JSON_UNLIKELY(get_token() != token_type::value_string)) { return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string(), @@ -415,8 +405,7 @@ class parser } // parse separator (:) - get_token(); - if (JSON_UNLIKELY(last_token != token_type::name_separator)) + if (JSON_UNLIKELY(get_token() != token_type::name_separator)) { return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string(), diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index 9df10e1a..415ed114 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -3975,11 +3975,8 @@ class parser return false; } - // read next token - get_token(); - // closing } -> we are done - if (last_token == token_type::end_object) + if (get_token() == token_type::end_object) { if (JSON_UNLIKELY(not sax->end_object())) { @@ -4004,8 +4001,7 @@ class parser } // parse separator (:) - get_token(); - if (JSON_UNLIKELY(last_token != token_type::name_separator)) + if (JSON_UNLIKELY(get_token() != token_type::name_separator)) { return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string(), @@ -4027,11 +4023,8 @@ class parser return false; } - // read next token - get_token(); - // closing ] -> we are done - if (last_token == token_type::end_array) + if (get_token() == token_type::end_array) { if (JSON_UNLIKELY(not sax->end_array())) { @@ -4150,11 +4143,10 @@ class parser } else { - get_token(); if (states.back()) // array { // comma -> next value - if (last_token == token_type::value_separator) + if (get_token() == token_type::value_separator) { // parse a new value get_token(); @@ -4188,12 +4180,10 @@ class parser else // object { // comma -> next value - if (last_token == token_type::value_separator) + if (get_token() == token_type::value_separator) { - get_token(); - // parse key - if (JSON_UNLIKELY(last_token != token_type::value_string)) + if (JSON_UNLIKELY(get_token() != token_type::value_string)) { return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string(), @@ -4208,8 +4198,7 @@ class parser } // parse separator (:) - get_token(); - if (JSON_UNLIKELY(last_token != token_type::name_separator)) + if (JSON_UNLIKELY(get_token() != token_type::name_separator)) { return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string(), From aa89c5e048bc31c2b2c60ce50f498e0651c7f604 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Mon, 2 Apr 2018 21:10:48 +0200 Subject: [PATCH 37/43] :hammer: removing unget_character() function from input adapters #834 --- .../nlohmann/detail/input/input_adapters.hpp | 68 ++-------- include/nlohmann/detail/input/lexer.hpp | 60 +++++++- single_include/nlohmann/json.hpp | 128 +++++++++--------- test/src/unit-deserialization.cpp | 54 +++++--- 4 files changed, 168 insertions(+), 142 deletions(-) diff --git a/include/nlohmann/detail/input/input_adapters.hpp b/include/nlohmann/detail/input/input_adapters.hpp index 92987e97..82a59136 100644 --- a/include/nlohmann/detail/input/input_adapters.hpp +++ b/include/nlohmann/detail/input/input_adapters.hpp @@ -31,19 +31,17 @@ enum class input_format_t { json, cbor, msgpack, ubjson }; @brief abstract input adapter interface Produces a stream of std::char_traits::int_type characters from a -std::istream, a buffer, or some other input type. Accepts the return of exactly -one non-EOF character for future input. The int_type characters returned -consist of all valid char values as positive values (typically unsigned char), -plus an EOF value outside that range, specified by the value of the function -std::char_traits::eof(). This value is typically -1, but could be any -arbitrary value which is not a valid char value. +std::istream, a buffer, or some other input type. Accepts the return of +exactly one non-EOF character for future input. The int_type characters +returned consist of all valid char values as positive values (typically +unsigned char), plus an EOF value outside that range, specified by the value +of the function std::char_traits::eof(). This value is typically -1, but +could be any arbitrary value which is not a valid char value. */ struct input_adapter_protocol { /// get a character [0,255] or std::char_traits::eof(). virtual std::char_traits::int_type get_character() = 0; - /// restore the last non-eof() character to input - virtual void unget_character() = 0; virtual ~input_adapter_protocol() = default; }; @@ -71,34 +69,7 @@ class input_stream_adapter : public input_adapter_protocol explicit input_stream_adapter(std::istream& i) : is(i), sb(*i.rdbuf()) - { - // skip byte order mark - std::char_traits::int_type c; - if ((c = get_character()) == 0xEF) - { - if ((c = get_character()) == 0xBB) - { - if ((c = get_character()) == 0xBF) - { - return; // Ignore BOM - } - else if (c != std::char_traits::eof()) - { - is.unget(); - } - is.putback('\xBB'); - } - else if (c != std::char_traits::eof()) - { - is.unget(); - } - is.putback('\xEF'); - } - else if (c != std::char_traits::eof()) - { - is.unget(); // no byte order mark; process as usual - } - } + {} // delete because of pointer members input_stream_adapter(const input_stream_adapter&) = delete; @@ -112,11 +83,6 @@ class input_stream_adapter : public input_adapter_protocol return sb.sbumpc(); } - void unget_character() override - { - sb.sungetc(); // is.unget() avoided for performance - } - private: /// the associated input stream std::istream& is; @@ -128,14 +94,8 @@ class input_buffer_adapter : public input_adapter_protocol { public: input_buffer_adapter(const char* b, const std::size_t l) - : cursor(b), limit(b + l), start(b) - { - // skip byte order mark - if (l >= 3 and b[0] == '\xEF' and b[1] == '\xBB' and b[2] == '\xBF') - { - cursor += 3; - } - } + : cursor(b), limit(b + l) + {} // delete because of pointer members input_buffer_adapter(const input_buffer_adapter&) = delete; @@ -151,21 +111,11 @@ class input_buffer_adapter : public input_adapter_protocol return std::char_traits::eof(); } - void unget_character() noexcept override - { - if (JSON_LIKELY(cursor > start)) - { - --cursor; - } - } - private: /// pointer to the current character const char* cursor; /// pointer past the last character const char* limit; - /// pointer to the first character - const char* start; }; class input_adapter diff --git a/include/nlohmann/detail/input/lexer.hpp b/include/nlohmann/detail/input/lexer.hpp index 1ed35617..7b31068f 100644 --- a/include/nlohmann/detail/input/lexer.hpp +++ b/include/nlohmann/detail/input/lexer.hpp @@ -1081,7 +1081,16 @@ scan_number_done: std::char_traits::int_type get() { ++chars_read; - current = ia->get_character(); + if (next_unget) + { + // just reset the next_unget variable and work with current + next_unget = false; + } + else + { + current = ia->get_character(); + } + if (JSON_LIKELY(current != std::char_traits::eof())) { token_string.push_back(std::char_traits::to_char_type(current)); @@ -1089,13 +1098,20 @@ scan_number_done: return current; } - /// unget current character (return it again on next get) + /*! + @brief unget current character (read it again on next get) + + We implement unget by setting variable next_unget to true. The input is not + changed - we just simulate ungetting by modifying chars_read and + token_string. The next call to get() will behave as if the unget character + is read again. + */ void unget() { + next_unget = true; --chars_read; if (JSON_LIKELY(current != std::char_traits::eof())) { - ia->unget_character(); assert(token_string.size() != 0); token_string.pop_back(); } @@ -1183,8 +1199,43 @@ scan_number_done: // actual scanner ///////////////////// + /*! + @brief skip the UTF-8 byte order mark + @return true iff there is no BOM or the correct BOM has been skipped + */ + bool skip_bom() + { + if (get() == 0xEF) + { + if (get() == 0xBB and get() == 0xBF) + { + // we completely parsed the BOM + return true; + } + else + { + // after reading 0xEF, an unexpected character followed + return false; + } + } + else + { + // the first character is not the beginning of the BOM; unget it to + // process is later + unget(); + return true; + } + } + token_type scan() { + // initially, skip the BOM + if (chars_read == 0 and not skip_bom()) + { + error_message = "invalid BOM; must be 0xEF 0xBB 0xBF if given"; + return token_type::parse_error; + } + // read next character and ignore whitespace do { @@ -1254,6 +1305,9 @@ scan_number_done: /// the current character std::char_traits::int_type current = std::char_traits::eof(); + /// whether the next get() call should just return current + bool next_unget = false; + /// the number of characters read std::size_t chars_read = 0; diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index 415ed114..30085744 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -1604,19 +1604,17 @@ enum class input_format_t { json, cbor, msgpack, ubjson }; @brief abstract input adapter interface Produces a stream of std::char_traits::int_type characters from a -std::istream, a buffer, or some other input type. Accepts the return of exactly -one non-EOF character for future input. The int_type characters returned -consist of all valid char values as positive values (typically unsigned char), -plus an EOF value outside that range, specified by the value of the function -std::char_traits::eof(). This value is typically -1, but could be any -arbitrary value which is not a valid char value. +std::istream, a buffer, or some other input type. Accepts the return of +exactly one non-EOF character for future input. The int_type characters +returned consist of all valid char values as positive values (typically +unsigned char), plus an EOF value outside that range, specified by the value +of the function std::char_traits::eof(). This value is typically -1, but +could be any arbitrary value which is not a valid char value. */ struct input_adapter_protocol { /// get a character [0,255] or std::char_traits::eof(). virtual std::char_traits::int_type get_character() = 0; - /// restore the last non-eof() character to input - virtual void unget_character() = 0; virtual ~input_adapter_protocol() = default; }; @@ -1644,34 +1642,7 @@ class input_stream_adapter : public input_adapter_protocol explicit input_stream_adapter(std::istream& i) : is(i), sb(*i.rdbuf()) - { - // skip byte order mark - std::char_traits::int_type c; - if ((c = get_character()) == 0xEF) - { - if ((c = get_character()) == 0xBB) - { - if ((c = get_character()) == 0xBF) - { - return; // Ignore BOM - } - else if (c != std::char_traits::eof()) - { - is.unget(); - } - is.putback('\xBB'); - } - else if (c != std::char_traits::eof()) - { - is.unget(); - } - is.putback('\xEF'); - } - else if (c != std::char_traits::eof()) - { - is.unget(); // no byte order mark; process as usual - } - } + {} // delete because of pointer members input_stream_adapter(const input_stream_adapter&) = delete; @@ -1685,11 +1656,6 @@ class input_stream_adapter : public input_adapter_protocol return sb.sbumpc(); } - void unget_character() override - { - sb.sungetc(); // is.unget() avoided for performance - } - private: /// the associated input stream std::istream& is; @@ -1701,14 +1667,8 @@ class input_buffer_adapter : public input_adapter_protocol { public: input_buffer_adapter(const char* b, const std::size_t l) - : cursor(b), limit(b + l), start(b) - { - // skip byte order mark - if (l >= 3 and b[0] == '\xEF' and b[1] == '\xBB' and b[2] == '\xBF') - { - cursor += 3; - } - } + : cursor(b), limit(b + l) + {} // delete because of pointer members input_buffer_adapter(const input_buffer_adapter&) = delete; @@ -1724,21 +1684,11 @@ class input_buffer_adapter : public input_adapter_protocol return std::char_traits::eof(); } - void unget_character() noexcept override - { - if (JSON_LIKELY(cursor > start)) - { - --cursor; - } - } - private: /// pointer to the current character const char* cursor; /// pointer past the last character const char* limit; - /// pointer to the first character - const char* start; }; class input_adapter @@ -2923,7 +2873,16 @@ scan_number_done: std::char_traits::int_type get() { ++chars_read; - current = ia->get_character(); + if (next_unget) + { + // just reset the next_unget variable and work with current + next_unget = false; + } + else + { + current = ia->get_character(); + } + if (JSON_LIKELY(current != std::char_traits::eof())) { token_string.push_back(std::char_traits::to_char_type(current)); @@ -2931,13 +2890,20 @@ scan_number_done: return current; } - /// unget current character (return it again on next get) + /*! + @brief unget current character (read it again on next get) + + We implement unget by setting variable next_unget to true. The input is not + changed - we just simulate ungetting by modifying chars_read and + token_string. The next call to get() will behave as if the unget character + is read again. + */ void unget() { + next_unget = true; --chars_read; if (JSON_LIKELY(current != std::char_traits::eof())) { - ia->unget_character(); assert(token_string.size() != 0); token_string.pop_back(); } @@ -3025,8 +2991,43 @@ scan_number_done: // actual scanner ///////////////////// + /*! + @brief skip the UTF-8 byte order mark + @return true iff there is no BOM or the correct BOM has been skipped + */ + bool skip_bom() + { + if (get() == 0xEF) + { + if (get() == 0xBB and get() == 0xBF) + { + // we completely parsed the BOM + return true; + } + else + { + // after reading 0xEF, an unexpected character followed + return false; + } + } + else + { + // the first character is not the beginning of the BOM; unget it to + // process is later + unget(); + return true; + } + } + token_type scan() { + // initially, skip the BOM + if (chars_read == 0 and not skip_bom()) + { + error_message = "invalid BOM; must be 0xEF 0xBB 0xBF if given"; + return token_type::parse_error; + } + // read next character and ignore whitespace do { @@ -3096,6 +3097,9 @@ scan_number_done: /// the current character std::char_traits::int_type current = std::char_traits::eof(); + /// whether the next get() call should just return current + bool next_unget = false; + /// the number of characters read std::size_t chars_read = 0; diff --git a/test/src/unit-deserialization.cpp b/test/src/unit-deserialization.cpp index 5c9fde5a..d69c5983 100644 --- a/test/src/unit-deserialization.cpp +++ b/test/src/unit-deserialization.cpp @@ -798,18 +798,18 @@ TEST_CASE("deserialization") { CHECK_THROWS_AS(json::parse(bom), json::parse_error&); CHECK_THROWS_WITH(json::parse(bom), - "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal"); + "[json.exception.parse_error.101] parse error at 4: syntax error - unexpected end of input; expected '[', '{', or a literal"); CHECK_THROWS_AS(json::parse(std::istringstream(bom)), json::parse_error&); CHECK_THROWS_WITH(json::parse(std::istringstream(bom)), - "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal"); + "[json.exception.parse_error.101] parse error at 4: syntax error - unexpected end of input; expected '[', '{', or a literal"); SaxEventLogger l; CHECK(not json::sax_parse(bom, &l)); CHECK(l.events.size() == 1); CHECK(l.events == std::vector( { - "parse_error(1)" + "parse_error(4)" })); } @@ -836,12 +836,12 @@ TEST_CASE("deserialization") SECTION("2 byte of BOM") { CHECK_THROWS_AS(json::parse(bom.substr(0, 2)), json::parse_error&); - CHECK_THROWS_WITH(json::parse(bom), - "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal"); + CHECK_THROWS_WITH(json::parse(bom.substr(0, 2)), + "[json.exception.parse_error.101] parse error at 3: syntax error - invalid BOM; must be 0xEF 0xBB 0xBF if given; last read: '\xEF\xBB'"); CHECK_THROWS_AS(json::parse(std::istringstream(bom.substr(0, 2))), json::parse_error&); - CHECK_THROWS_WITH(json::parse(std::istringstream(bom)), - "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal"); + CHECK_THROWS_WITH(json::parse(std::istringstream(bom.substr(0, 2))), + "[json.exception.parse_error.101] parse error at 3: syntax error - invalid BOM; must be 0xEF 0xBB 0xBF if given; last read: '\xEF\xBB'"); SaxEventLogger l1, l2; CHECK(not json::sax_parse(std::istringstream(bom.substr(0, 2)), &l1)); @@ -849,24 +849,24 @@ TEST_CASE("deserialization") CHECK(l1.events.size() == 1); CHECK(l1.events == std::vector( { - "parse_error(1)" + "parse_error(3)" })); CHECK(l2.events.size() == 1); CHECK(l2.events == std::vector( { - "parse_error(1)" + "parse_error(3)" })); } SECTION("1 byte of BOM") { CHECK_THROWS_AS(json::parse(bom.substr(0, 1)), json::parse_error&); - CHECK_THROWS_WITH(json::parse(bom), - "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal"); + CHECK_THROWS_WITH(json::parse(bom.substr(0, 1)), + "[json.exception.parse_error.101] parse error at 2: syntax error - invalid BOM; must be 0xEF 0xBB 0xBF if given; last read: '\xEF'"); CHECK_THROWS_AS(json::parse(std::istringstream(bom.substr(0, 1))), json::parse_error&); - CHECK_THROWS_WITH(json::parse(std::istringstream(bom)), - "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal"); + CHECK_THROWS_WITH(json::parse(std::istringstream(bom.substr(0, 1))), + "[json.exception.parse_error.101] parse error at 2: syntax error - invalid BOM; must be 0xEF 0xBB 0xBF if given; last read: '\xEF'"); SaxEventLogger l1, l2; CHECK(not json::sax_parse(std::istringstream(bom.substr(0, 1)), &l1)); @@ -874,12 +874,12 @@ TEST_CASE("deserialization") CHECK(l1.events.size() == 1); CHECK(l1.events == std::vector( { - "parse_error(1)" + "parse_error(2)" })); CHECK(l2.events.size() == 1); CHECK(l2.events == std::vector( { - "parse_error(1)" + "parse_error(2)" })); } @@ -926,10 +926,28 @@ TEST_CASE("deserialization") SaxEventLogger l; CHECK(not json::sax_parse(s + "null", &l)); CHECK(l.events.size() == 1); - CHECK(l.events == std::vector( + + if (i0 != 0) { - "parse_error(1)" - })); + CHECK(l.events == std::vector( + { + "parse_error(1)" + })); + } + else if (i1 != 0) + { + CHECK(l.events == std::vector( + { + "parse_error(2)" + })); + } + else + { + CHECK(l.events == std::vector( + { + "parse_error(3)" + })); + } } } } From 1e08654f995ab9248ea3567dcb3854d2928198a2 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Mon, 2 Apr 2018 22:25:17 +0200 Subject: [PATCH 38/43] :hammer: cleanup --- .travis.yml | 1 + include/nlohmann/detail/input/input_adapters.hpp | 7 ++----- include/nlohmann/detail/input/json_sax.hpp | 1 - single_include/nlohmann/json.hpp | 6 ++---- 4 files changed, 5 insertions(+), 10 deletions(-) diff --git a/.travis.yml b/.travis.yml index 8ce38cc8..68a16db5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -277,6 +277,7 @@ script: if [[ (-x $(which brew)) ]]; then brew update brew install cmake ninja + brew upgrade cmake cmake --version fi diff --git a/include/nlohmann/detail/input/input_adapters.hpp b/include/nlohmann/detail/input/input_adapters.hpp index 82a59136..4c942f33 100644 --- a/include/nlohmann/detail/input/input_adapters.hpp +++ b/include/nlohmann/detail/input/input_adapters.hpp @@ -1,11 +1,8 @@ #pragma once -#include // min -#include // array #include // assert #include // size_t #include // strlen -#include // streamsize, streamoff, streampos #include // istream #include // begin, end, iterator_traits, random_access_iterator_tag, distance, next #include // shared_ptr, make_shared, addressof @@ -31,7 +28,7 @@ enum class input_format_t { json, cbor, msgpack, ubjson }; @brief abstract input adapter interface Produces a stream of std::char_traits::int_type characters from a -std::istream, a buffer, or some other input type. Accepts the return of +std::istream, a buffer, or some other input type. Accepts the return of exactly one non-EOF character for future input. The int_type characters returned consist of all valid char values as positive values (typically unsigned char), plus an EOF value outside that range, specified by the value @@ -115,7 +112,7 @@ class input_buffer_adapter : public input_adapter_protocol /// pointer to the current character const char* cursor; /// pointer past the last character - const char* limit; + const char* const limit; }; class input_adapter diff --git a/include/nlohmann/detail/input/json_sax.hpp b/include/nlohmann/detail/input/json_sax.hpp index 0bc628cc..561bbe1c 100644 --- a/include/nlohmann/detail/input/json_sax.hpp +++ b/include/nlohmann/detail/input/json_sax.hpp @@ -660,4 +660,3 @@ class json_sax_acceptor : public json_sax } } - diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index 30085744..637cb403 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -1572,12 +1572,10 @@ constexpr const auto& to_json = detail::static_const::value; // #include -#include // min #include // array #include // assert #include // size_t #include // strlen -#include // streamsize, streamoff, streampos #include // istream #include // begin, end, iterator_traits, random_access_iterator_tag, distance, next #include // shared_ptr, make_shared, addressof @@ -1686,9 +1684,9 @@ class input_buffer_adapter : public input_adapter_protocol private: /// pointer to the current character - const char* cursor; + const char* const cursor; /// pointer past the last character - const char* limit; + const char* const limit; }; class input_adapter From 08a7233d1b58232e34488d2e14168ce913abd577 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Mon, 2 Apr 2018 22:38:44 +0200 Subject: [PATCH 39/43] :ambulance: fixed commit 1e08654 --- single_include/nlohmann/json.hpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index 637cb403..226ac96b 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -1572,7 +1572,6 @@ constexpr const auto& to_json = detail::static_const::value; // #include -#include // array #include // assert #include // size_t #include // strlen @@ -1602,7 +1601,7 @@ enum class input_format_t { json, cbor, msgpack, ubjson }; @brief abstract input adapter interface Produces a stream of std::char_traits::int_type characters from a -std::istream, a buffer, or some other input type. Accepts the return of +std::istream, a buffer, or some other input type. Accepts the return of exactly one non-EOF character for future input. The int_type characters returned consist of all valid char values as positive values (typically unsigned char), plus an EOF value outside that range, specified by the value @@ -1684,7 +1683,7 @@ class input_buffer_adapter : public input_adapter_protocol private: /// pointer to the current character - const char* const cursor; + const char* cursor; /// pointer past the last character const char* const limit; }; @@ -3803,7 +3802,6 @@ class json_sax_acceptor : public json_sax } - // #include // #include From ae213721b1d51570f8d922c8b3f7a7fa13ccdbdf Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Sun, 6 May 2018 14:05:29 +0200 Subject: [PATCH 40/43] :hammer: removed unget function for wstring parsers --- .../nlohmann/detail/input/input_adapters.hpp | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/include/nlohmann/detail/input/input_adapters.hpp b/include/nlohmann/detail/input/input_adapters.hpp index 6a3ffa2b..d1c7fea1 100644 --- a/include/nlohmann/detail/input/input_adapters.hpp +++ b/include/nlohmann/detail/input/input_adapters.hpp @@ -123,13 +123,6 @@ class wide_string_input_adapter : public input_adapter_protocol std::char_traits::int_type get_character() noexcept override { - // unget_character() was called previously: return the last character - if (next_unget) - { - next_unget = false; - return last_char; - } - // check if buffer needs to be filled if (utf8_bytes_index == utf8_bytes_filled) { @@ -149,12 +142,7 @@ class wide_string_input_adapter : public input_adapter_protocol // use buffer assert(utf8_bytes_filled > 0); assert(utf8_bytes_index < utf8_bytes_filled); - return (last_char = utf8_bytes[utf8_bytes_index++]); - } - - void unget_character() noexcept override - { - next_unget = true; + return utf8_bytes[utf8_bytes_index++]; } private: @@ -278,11 +266,6 @@ class wide_string_input_adapter : public input_adapter_protocol std::size_t utf8_bytes_index = 0; /// number of valid bytes in the utf8_codes array std::size_t utf8_bytes_filled = 0; - - /// the last character (returned after unget_character() is called) - std::char_traits::int_type last_char = 0; - /// whether get_character() should return last_char - bool next_unget = false; }; class input_adapter From e94862a6495d9cfa8ea7f7c4b042fcd996705059 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Sun, 6 May 2018 19:00:49 +0200 Subject: [PATCH 41/43] :ambulance: fixed error in callback logic --- include/nlohmann/detail/input/json_sax.hpp | 23 ++++++++---- single_include/nlohmann/json.hpp | 42 +++++++++------------- 2 files changed, 33 insertions(+), 32 deletions(-) diff --git a/include/nlohmann/detail/input/json_sax.hpp b/include/nlohmann/detail/input/json_sax.hpp index 561bbe1c..6f3aa99c 100644 --- a/include/nlohmann/detail/input/json_sax.hpp +++ b/include/nlohmann/detail/input/json_sax.hpp @@ -349,10 +349,10 @@ class json_sax_dom_callback_parser : public json_sax bool start_object(std::size_t len) override { // check callback for object start - const bool keep = callback(static_cast(ref_stack.size()) + 1, parse_event_t::object_start, discarded); + const bool keep = callback(static_cast(ref_stack.size()), parse_event_t::object_start, discarded); keep_stack.push_back(keep); - auto val = handle_value(BasicJsonType::value_t::object); + auto val = handle_value(BasicJsonType::value_t::object, true); ref_stack.push_back(val.second); // check object limit @@ -424,10 +424,10 @@ class json_sax_dom_callback_parser : public json_sax bool start_array(std::size_t len) override { - const bool keep = callback(static_cast(ref_stack.size()) + 1, parse_event_t::array_start, discarded); + const bool keep = callback(static_cast(ref_stack.size()), parse_event_t::array_start, discarded); keep_stack.push_back(keep); - auto val = handle_value(BasicJsonType::value_t::array); + auto val = handle_value(BasicJsonType::value_t::array, true); ref_stack.push_back(val.second); // check array limit @@ -507,15 +507,22 @@ class json_sax_dom_callback_parser : public json_sax private: /*! + @param[in] v value to add to the JSON value we build during parsing + @param[in] skip_callback whether we should skip calling the callback + function; this is required after start_array() and + start_object() SAX events, because otherwise we would call the + callback function with an empty array or object, respectively. + @invariant If the ref stack is empty, then the passed value will be the new root. @invariant If the ref stack contains a value, then it is an array or an object to which we can add elements + @return pair of boolean (whether value should be kept) and pointer (to the passed value in the ref_stack hierarchy; nullptr if not kept) */ template - std::pair handle_value(Value&& v) + std::pair handle_value(Value&& v, const bool skip_callback = false) { assert(not keep_stack.empty()); @@ -526,9 +533,11 @@ class json_sax_dom_callback_parser : public json_sax return {false, nullptr}; } - // create value and check callback + // create value auto value = BasicJsonType(std::forward(v)); - const bool keep = callback(static_cast(ref_stack.size()), parse_event_t::value, value); + + // check callback + const bool keep = skip_callback or callback(static_cast(ref_stack.size()), parse_event_t::value, value); // do not handle this value if we just learnt it shall be discarded if (not keep) diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index fa7f585d..95077359 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -1697,13 +1697,6 @@ class wide_string_input_adapter : public input_adapter_protocol std::char_traits::int_type get_character() noexcept override { - // unget_character() was called previously: return the last character - if (next_unget) - { - next_unget = false; - return last_char; - } - // check if buffer needs to be filled if (utf8_bytes_index == utf8_bytes_filled) { @@ -1723,12 +1716,7 @@ class wide_string_input_adapter : public input_adapter_protocol // use buffer assert(utf8_bytes_filled > 0); assert(utf8_bytes_index < utf8_bytes_filled); - return (last_char = utf8_bytes[utf8_bytes_index++]); - } - - void unget_character() noexcept override - { - next_unget = true; + return utf8_bytes[utf8_bytes_index++]; } private: @@ -1852,11 +1840,6 @@ class wide_string_input_adapter : public input_adapter_protocol std::size_t utf8_bytes_index = 0; /// number of valid bytes in the utf8_codes array std::size_t utf8_bytes_filled = 0; - - /// the last character (returned after unget_character() is called) - std::char_traits::int_type last_char = 0; - /// whether get_character() should return last_char - bool next_unget = false; }; class input_adapter @@ -3670,10 +3653,10 @@ class json_sax_dom_callback_parser : public json_sax bool start_object(std::size_t len) override { // check callback for object start - const bool keep = callback(static_cast(ref_stack.size()) + 1, parse_event_t::object_start, discarded); + const bool keep = callback(static_cast(ref_stack.size()), parse_event_t::object_start, discarded); keep_stack.push_back(keep); - auto val = handle_value(BasicJsonType::value_t::object); + auto val = handle_value(BasicJsonType::value_t::object, true); ref_stack.push_back(val.second); // check object limit @@ -3745,10 +3728,10 @@ class json_sax_dom_callback_parser : public json_sax bool start_array(std::size_t len) override { - const bool keep = callback(static_cast(ref_stack.size()) + 1, parse_event_t::array_start, discarded); + const bool keep = callback(static_cast(ref_stack.size()), parse_event_t::array_start, discarded); keep_stack.push_back(keep); - auto val = handle_value(BasicJsonType::value_t::array); + auto val = handle_value(BasicJsonType::value_t::array, true); ref_stack.push_back(val.second); // check array limit @@ -3828,15 +3811,22 @@ class json_sax_dom_callback_parser : public json_sax private: /*! + @param[in] v value to add to the JSON value we build during parsing + @param[in] skip_callback whether we should skip calling the callback + function; this is required after start_array() and + start_object() SAX events, because otherwise we would call the + callback function with an empty array or object, respectively. + @invariant If the ref stack is empty, then the passed value will be the new root. @invariant If the ref stack contains a value, then it is an array or an object to which we can add elements + @return pair of boolean (whether value should be kept) and pointer (to the passed value in the ref_stack hierarchy; nullptr if not kept) */ template - std::pair handle_value(Value&& v) + std::pair handle_value(Value&& v, const bool skip_callback = false) { assert(not keep_stack.empty()); @@ -3847,9 +3837,11 @@ class json_sax_dom_callback_parser : public json_sax return {false, nullptr}; } - // create value and check callback + // create value auto value = BasicJsonType(std::forward(v)); - const bool keep = callback(static_cast(ref_stack.size()), parse_event_t::value, value); + + // check callback + const bool keep = skip_callback or callback(static_cast(ref_stack.size()), parse_event_t::value, value); // do not handle this value if we just learnt it shall be discarded if (not keep) From 1f84cc2c88c6522e5c953483d741f39bf384df08 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Thu, 17 May 2018 16:50:37 +0200 Subject: [PATCH 42/43] :white_check_mark: adjusted test cases --- test/src/unit-testsuites.cpp | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/test/src/unit-testsuites.cpp b/test/src/unit-testsuites.cpp index 7de00c55..24c4195d 100644 --- a/test/src/unit-testsuites.cpp +++ b/test/src/unit-testsuites.cpp @@ -1139,7 +1139,6 @@ TEST_CASE("nst's JSONTestSuite (2)") "test/data/nst_json_testsuite2/test_parsing/n_string_unescaped_tab.json", "test/data/nst_json_testsuite2/test_parsing/n_string_unicode_CapitalU.json", "test/data/nst_json_testsuite2/test_parsing/n_string_with_trailing_garbage.json", - //"test/data/nst_json_testsuite2/test_parsing/n_structure_100000_opening_arrays.json", "test/data/nst_json_testsuite2/test_parsing/n_structure_U+2060_word_joined.json", "test/data/nst_json_testsuite2/test_parsing/n_structure_UTF8_BOM_no_data.json", "test/data/nst_json_testsuite2/test_parsing/n_structure_angle_bracket_..json", @@ -1165,7 +1164,6 @@ TEST_CASE("nst's JSONTestSuite (2)") "test/data/nst_json_testsuite2/test_parsing/n_structure_object_with_trailing_garbage.json", "test/data/nst_json_testsuite2/test_parsing/n_structure_open_array_apostrophe.json", "test/data/nst_json_testsuite2/test_parsing/n_structure_open_array_comma.json", - //"test/data/nst_json_testsuite2/test_parsing/n_structure_open_array_object.json", "test/data/nst_json_testsuite2/test_parsing/n_structure_open_array_open_object.json", "test/data/nst_json_testsuite2/test_parsing/n_structure_open_array_open_string.json", "test/data/nst_json_testsuite2/test_parsing/n_structure_open_array_string.json", @@ -1199,6 +1197,21 @@ TEST_CASE("nst's JSONTestSuite (2)") } } + SECTION("n (previously overflowed)") + { + for (auto filename : + { + "test/data/nst_json_testsuite2/test_parsing/n_structure_100000_opening_arrays.json", + "test/data/nst_json_testsuite2/test_parsing/n_structure_open_array_object.json" + } + ) + { + CAPTURE(filename); + std::ifstream f(filename); + CHECK(not json::accept(f)); + } + } + SECTION("i -> y") { for (auto filename : From 3cdc4d784b04db0579523ec717cb57c1601c1014 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Sun, 27 May 2018 18:07:53 +0200 Subject: [PATCH 43/43] :memo: added documentation --- include/nlohmann/detail/input/json_sax.hpp | 18 ++++++++++++++++++ single_include/nlohmann/json.hpp | 18 ++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/include/nlohmann/detail/input/json_sax.hpp b/include/nlohmann/detail/input/json_sax.hpp index 6f3aa99c..5970afd5 100644 --- a/include/nlohmann/detail/input/json_sax.hpp +++ b/include/nlohmann/detail/input/json_sax.hpp @@ -122,6 +122,19 @@ struct json_sax namespace detail { +/*! +@brief SAX implementation to create a JSON value from SAX events + +This class implements the @ref json_sax interface and processes the SAX events +to create a JSON value which makes it basically a DOM parser. The structure or +hierarchy of the JSON value is managed by the stack `ref_stack` which contains +a pointer to the respective array or object for each recursion depth. + +After successful parsing, the value that is passed by reference to the +constructor contains the parsed value. + +@tparam BasicJsonType the JSON type +*/ template class json_sax_dom_parser : public json_sax { @@ -131,6 +144,11 @@ class json_sax_dom_parser : public json_sax using number_float_t = typename BasicJsonType::number_float_t; using string_t = typename BasicJsonType::string_t; + /*! + @param[in, out] r reference to a JSON value that is manipulated while + parsing + @param[in] allow_exceptions_ whether parse errors yield exceptions + */ json_sax_dom_parser(BasicJsonType& r, const bool allow_exceptions_ = true) : root(r), allow_exceptions(allow_exceptions_) {} diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index ca3539e3..6559d23d 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -3426,6 +3426,19 @@ struct json_sax namespace detail { +/*! +@brief SAX implementation to create a JSON value from SAX events + +This class implements the @ref json_sax interface and processes the SAX events +to create a JSON value which makes it basically a DOM parser. The structure or +hierarchy of the JSON value is managed by the stack `ref_stack` which contains +a pointer to the respective array or object for each recursion depth. + +After successful parsing, the value that is passed by reference to the +constructor contains the parsed value. + +@tparam BasicJsonType the JSON type +*/ template class json_sax_dom_parser : public json_sax { @@ -3435,6 +3448,11 @@ class json_sax_dom_parser : public json_sax using number_float_t = typename BasicJsonType::number_float_t; using string_t = typename BasicJsonType::string_t; + /*! + @param[in, out] r reference to a JSON value that is manipulated while + parsing + @param[in] allow_exceptions_ whether parse errors yield exceptions + */ json_sax_dom_parser(BasicJsonType& r, const bool allow_exceptions_ = true) : root(r), allow_exceptions(allow_exceptions_) {}