From b53c6e2f8193cb6469ff4319f6c516ca94de71d3 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Tue, 16 Jun 2020 12:28:59 +0200 Subject: [PATCH 1/6] :sparkles: ignore comments --- include/nlohmann/detail/input/lexer.hpp | 88 ++++++++++++++++++++++++- single_include/nlohmann/json.hpp | 88 ++++++++++++++++++++++++- test/src/unit-class_lexer.cpp | 20 ++++++ 3 files changed, 190 insertions(+), 6 deletions(-) diff --git a/include/nlohmann/detail/input/lexer.hpp b/include/nlohmann/detail/input/lexer.hpp index 0ff0c736..00af0ce2 100644 --- a/include/nlohmann/detail/input/lexer.hpp +++ b/include/nlohmann/detail/input/lexer.hpp @@ -112,8 +112,11 @@ class lexer : public lexer_base public: using token_type = typename lexer_base::token_type; - explicit lexer(InputAdapterType&& adapter) - : ia(std::move(adapter)), decimal_point_char(static_cast(get_decimal_point())) {} + explicit lexer(InputAdapterType&& adapter, bool ignore_comments_ = false) + : ia(std::move(adapter)) + , ignore_comments(ignore_comments_) + , decimal_point_char(static_cast(get_decimal_point())) + {} // delete because of pointer members lexer(const lexer&) = delete; @@ -131,7 +134,7 @@ class lexer : public lexer_base JSON_HEDLEY_PURE static char get_decimal_point() noexcept { - const auto loc = localeconv(); + const auto* loc = localeconv(); assert(loc != nullptr); return (loc->decimal_point == nullptr) ? '.' : *(loc->decimal_point); } @@ -826,6 +829,71 @@ class lexer : public lexer_base } } + /*! + * @brief scan a comment + * @return whether comment could be scanned successfully + */ + bool scan_comment() + { + // remember character after '/' to distinguish comment types + const auto comment_char = get(); + + // expect // or /* to start a comment + if (comment_char != '/' and comment_char != '*') + { + return false; + } + + while (true) + { + switch (get()) + { + // EOF inside a /* comment is an error, in // it is OK + case std::char_traits::eof(): + case '\0': + { + return comment_char == '/'; + } + + // a newline ends the // comment + case '\n': + case '\r': + { + if (comment_char == '/') + { + return true; + } + break; + } + + // */ ends the /* comment + case '*': + { + if (comment_char == '*') + { + switch (get()) + { + case '/': + { + return true; + } + + default: + { + unget(); + break; + } + } + } + break; + } + + default: + break; + } + } + } + JSON_HEDLEY_NON_NULL(2) static void strtof(float& f, const char* str, char** endptr) noexcept { @@ -1431,6 +1499,17 @@ scan_number_done: } while (current == ' ' or current == '\t' or current == '\n' or current == '\r'); + // ignore comments + if (ignore_comments and current == '/') + { + if (not scan_comment()) + { + error_message = "invalid comment"; + return token_type::parse_error; + } + get(); + } + switch (current) { // structural characters @@ -1499,6 +1578,9 @@ scan_number_done: /// input adapter InputAdapterType ia; + /// whether comments should be ignored (true) or signaled as errors (false) + const bool ignore_comments = false; + /// the current character char_int_type current = std::char_traits::eof(); diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index cc822a54..82435fa7 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -8179,8 +8179,11 @@ class lexer : public lexer_base public: using token_type = typename lexer_base::token_type; - explicit lexer(InputAdapterType&& adapter) - : ia(std::move(adapter)), decimal_point_char(static_cast(get_decimal_point())) {} + explicit lexer(InputAdapterType&& adapter, bool ignore_comments_ = false) + : ia(std::move(adapter)) + , ignore_comments(ignore_comments_) + , decimal_point_char(static_cast(get_decimal_point())) + {} // delete because of pointer members lexer(const lexer&) = delete; @@ -8198,7 +8201,7 @@ class lexer : public lexer_base JSON_HEDLEY_PURE static char get_decimal_point() noexcept { - const auto loc = localeconv(); + const auto* loc = localeconv(); assert(loc != nullptr); return (loc->decimal_point == nullptr) ? '.' : *(loc->decimal_point); } @@ -8893,6 +8896,71 @@ class lexer : public lexer_base } } + /*! + * @brief scan a comment + * @return whether comment could be scanned successfully + */ + bool scan_comment() + { + // remember character after '/' to distinguish comment types + const auto comment_char = get(); + + // expect // or /* to start a comment + if (comment_char != '/' and comment_char != '*') + { + return false; + } + + while (true) + { + switch (get()) + { + // EOF inside a /* comment is an error, in // it is OK + case std::char_traits::eof(): + case '\0': + { + return comment_char == '/'; + } + + // a newline ends the // comment + case '\n': + case '\r': + { + if (comment_char == '/') + { + return true; + } + break; + } + + // */ ends the /* comment + case '*': + { + if (comment_char == '*') + { + switch (get()) + { + case '/': + { + return true; + } + + default: + { + unget(); + break; + } + } + } + break; + } + + default: + break; + } + } + } + JSON_HEDLEY_NON_NULL(2) static void strtof(float& f, const char* str, char** endptr) noexcept { @@ -9498,6 +9566,17 @@ scan_number_done: } while (current == ' ' or current == '\t' or current == '\n' or current == '\r'); + // ignore comments + if (ignore_comments and current == '/') + { + if (not scan_comment()) + { + error_message = "invalid comment"; + return token_type::parse_error; + } + get(); + } + switch (current) { // structural characters @@ -9566,6 +9645,9 @@ scan_number_done: /// input adapter InputAdapterType ia; + /// whether comments should be ignored (true) or signaled as errors (false) + const bool ignore_comments = false; + /// the current character char_int_type current = std::char_traits::eof(); diff --git a/test/src/unit-class_lexer.cpp b/test/src/unit-class_lexer.cpp index 0e1b5378..c4423e60 100644 --- a/test/src/unit-class_lexer.cpp +++ b/test/src/unit-class_lexer.cpp @@ -127,6 +127,8 @@ TEST_CASE("lexer class") // store scan() result const auto res = scan_string(s.c_str()); + CAPTURE(s); + switch (c) { // single characters that are valid tokens @@ -161,6 +163,9 @@ TEST_CASE("lexer class") break; } + // case ('/'): + // break; + // anything else is not expected default: { @@ -179,4 +184,19 @@ TEST_CASE("lexer class") s += "\""; CHECK((scan_string(s.c_str()) == json::lexer::token_type::value_string)); } + + // SECTION("ignore comments") + // { + // CHECK((scan_string("/") == json::lexer::token_type::parse_error)); + // + // CHECK((scan_string("/!") == json::lexer::token_type::parse_error)); + // CHECK((scan_string("/*") == json::lexer::token_type::parse_error)); + // CHECK((scan_string("/**") == json::lexer::token_type::parse_error)); + // + // CHECK((scan_string("//") == json::lexer::token_type::end_of_input)); + // CHECK((scan_string("/**/") == json::lexer::token_type::end_of_input)); + // CHECK((scan_string("/** /") == json::lexer::token_type::parse_error)); + // + // CHECK((scan_string("/***/") == json::lexer::token_type::end_of_input)); + // } } From e9bfcf72550c7ba99b5cdd5c0b1e5038c1bcc4ae Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Wed, 17 Jun 2020 14:59:47 +0200 Subject: [PATCH 2/6] :zap: improve comment parsing --- include/nlohmann/detail/input/lexer.hpp | 72 +++++++++++-------------- single_include/nlohmann/json.hpp | 72 +++++++++++-------------- 2 files changed, 64 insertions(+), 80 deletions(-) diff --git a/include/nlohmann/detail/input/lexer.hpp b/include/nlohmann/detail/input/lexer.hpp index 00af0ce2..eab64f40 100644 --- a/include/nlohmann/detail/input/lexer.hpp +++ b/include/nlohmann/detail/input/lexer.hpp @@ -835,62 +835,54 @@ class lexer : public lexer_base */ bool scan_comment() { - // remember character after '/' to distinguish comment types - const auto comment_char = get(); - - // expect // or /* to start a comment - if (comment_char != '/' and comment_char != '*') + switch (get()) { - return false; - } - - while (true) - { - switch (get()) + case '/': { - // EOF inside a /* comment is an error, in // it is OK - case std::char_traits::eof(): - case '\0': + while (true) { - return comment_char == '/'; - } - - // a newline ends the // comment - case '\n': - case '\r': - { - if (comment_char == '/') + switch (get()) { - return true; + case '\n': + case '\r': + return true; + + default: + break; } - break; } + } - // */ ends the /* comment - case '*': + case '*': + { + while (true) { - if (comment_char == '*') + switch (get()) { - switch (get()) - { - case '/': - { - return true; - } + case std::char_traits::eof(): + case '\0': + return false; - default: + case '*': + { + switch (get()) { - unget(); - break; + case '/': + return true; + + default: + { + unget(); + break; + } } } } - break; } - - default: - break; } + + default: + return false; } } diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index 82435fa7..bdd97a14 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -8902,62 +8902,54 @@ class lexer : public lexer_base */ bool scan_comment() { - // remember character after '/' to distinguish comment types - const auto comment_char = get(); - - // expect // or /* to start a comment - if (comment_char != '/' and comment_char != '*') + switch (get()) { - return false; - } - - while (true) - { - switch (get()) + case '/': { - // EOF inside a /* comment is an error, in // it is OK - case std::char_traits::eof(): - case '\0': + while (true) { - return comment_char == '/'; - } - - // a newline ends the // comment - case '\n': - case '\r': - { - if (comment_char == '/') + switch (get()) { - return true; + case '\n': + case '\r': + return true; + + default: + break; } - break; } + } - // */ ends the /* comment - case '*': + case '*': + { + while (true) { - if (comment_char == '*') + switch (get()) { - switch (get()) - { - case '/': - { - return true; - } + case std::char_traits::eof(): + case '\0': + return false; - default: + case '*': + { + switch (get()) { - unget(); - break; + case '/': + return true; + + default: + { + unget(); + break; + } } } } - break; } - - default: - break; } + + default: + return false; } } From 74520d8bb0aa62374e5c5465b5b0f3b43d75d956 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Wed, 17 Jun 2020 22:03:14 +0200 Subject: [PATCH 3/6] :construction: extend API --- include/nlohmann/detail/input/lexer.hpp | 8 +++ include/nlohmann/detail/input/parser.hpp | 7 ++- include/nlohmann/json.hpp | 63 ++++++++++++------- single_include/nlohmann/json.hpp | 78 +++++++++++++++++------- test/src/unit-class_lexer.cpp | 58 ++++++++++++------ 5 files changed, 148 insertions(+), 66 deletions(-) diff --git a/include/nlohmann/detail/input/lexer.hpp b/include/nlohmann/detail/input/lexer.hpp index eab64f40..d5e243e6 100644 --- a/include/nlohmann/detail/input/lexer.hpp +++ b/include/nlohmann/detail/input/lexer.hpp @@ -837,6 +837,7 @@ class lexer : public lexer_base { switch (get()) { + // single-line comments skip input until a newline or EOF is read case '/': { while (true) @@ -845,6 +846,8 @@ class lexer : public lexer_base { case '\n': case '\r': + case std::char_traits::eof(): + case '\0': return true; default: @@ -853,6 +856,7 @@ class lexer : public lexer_base } } + // multi-line comments skip input until */ is read case '*': { while (true) @@ -877,10 +881,14 @@ class lexer : public lexer_base } } } + + default: + break; } } } + // unexpected character after reading '/' default: return false; } diff --git a/include/nlohmann/detail/input/parser.hpp b/include/nlohmann/detail/input/parser.hpp index 0546b88c..c79b492a 100644 --- a/include/nlohmann/detail/input/parser.hpp +++ b/include/nlohmann/detail/input/parser.hpp @@ -63,8 +63,11 @@ class parser /// a parser reading from an input adapter explicit parser(InputAdapterType&& adapter, const parser_callback_t cb = nullptr, - const bool allow_exceptions_ = true) - : callback(cb), m_lexer(std::move(adapter)), allow_exceptions(allow_exceptions_) + const bool allow_exceptions_ = true, + const bool skip_comments = false) + : callback(cb) + , m_lexer(std::move(adapter), skip_comments) + , allow_exceptions(allow_exceptions_) { // read first token get_token(); diff --git a/include/nlohmann/json.hpp b/include/nlohmann/json.hpp index 409a6e79..8698a9bb 100644 --- a/include/nlohmann/json.hpp +++ b/include/nlohmann/json.hpp @@ -196,10 +196,12 @@ class basic_json static ::nlohmann::detail::parser parser( InputAdapterType adapter, detail::parser_callback_tcb = nullptr, - bool allow_exceptions = true + const bool allow_exceptions = true, + const bool ignore_comments = false ) { - return ::nlohmann::detail::parser(std::move(adapter), std::move(cb), allow_exceptions); + return ::nlohmann::detail::parser(std::move(adapter), + std::move(cb), allow_exceptions, ignore_comments); } using primitive_iterator_t = ::nlohmann::detail::primitive_iterator_t; @@ -6563,6 +6565,8 @@ class basic_json (optional) @param[in] allow_exceptions whether to throw exceptions in case of a parse error (optional, true by default) + @param[in] ignore_comments whether comments should be ignored (true) or + yield a parse error (true); (optional, false by default) @return deserialized JSON value; in case of a parse error and @a allow_exceptions set to `false`, the return value will be @@ -6591,16 +6595,18 @@ class basic_json @liveexample{The example below demonstrates the `parse()` function reading from a contiguous container.,parse__contiguouscontainer__parser_callback_t} - @since version 2.0.3 (contiguous containers) + @since version 2.0.3 (contiguous containers); version 3.9.0 allowed to + ignore comments. */ template JSON_HEDLEY_WARN_UNUSED_RESULT static basic_json parse(InputType&& i, const parser_callback_t cb = nullptr, - const bool allow_exceptions = true) + const bool allow_exceptions = true, + const bool ignore_comments = false) { basic_json result; - parser(detail::input_adapter(std::forward(i)), cb, allow_exceptions).parse(true, result); + parser(detail::input_adapter(std::forward(i)), cb, allow_exceptions, ignore_comments).parse(true, result); return result; } @@ -6617,6 +6623,8 @@ class basic_json (optional) @param[in] allow_exceptions whether to throw exceptions in case of a parse error (optional, true by default) + @param[in] ignore_comments whether comments should be ignored (true) or + yield a parse error (true); (optional, false by default) @return deserialized JSON value; in case of a parse error and @a allow_exceptions set to `false`, the return value will be @@ -6632,10 +6640,11 @@ class basic_json static basic_json parse(IteratorType first, IteratorType last, const parser_callback_t cb = nullptr, - const bool allow_exceptions = true) + const bool allow_exceptions = true, + const bool ignore_comments = false) { basic_json result; - parser(detail::input_adapter(std::move(first), std::move(last)), cb, allow_exceptions).parse(true, result); + parser(detail::input_adapter(std::move(first), std::move(last)), cb, allow_exceptions, ignore_comments).parse(true, result); return result; } @@ -6643,10 +6652,11 @@ class basic_json JSON_HEDLEY_DEPRECATED_FOR(3.8.0, parse(ptr, ptr + len)) static basic_json parse(detail::span_input_adapter&& i, const parser_callback_t cb = nullptr, - const bool allow_exceptions = true) + const bool allow_exceptions = true, + const bool ignore_comments = false) { basic_json result; - parser(i.get(), cb, allow_exceptions).parse(true, result); + parser(i.get(), cb, allow_exceptions, ignore_comments).parse(true, result); return result; } @@ -6666,6 +6676,8 @@ class basic_json iterators. @param[in] i input to read from + @param[in] ignore_comments whether comments should be ignored (true) or + yield a parse error (true); (optional, false by default) @return Whether the input read from @a i is valid JSON. @@ -6678,22 +6690,25 @@ class basic_json from a string.,accept__string} */ template - static bool accept(InputType&& i) + static bool accept(InputType&& i, + const bool ignore_comments = false) { - return parser(detail::input_adapter(std::forward(i))).accept(true); + return parser(detail::input_adapter(std::forward(i)), nullptr, false, ignore_comments).accept(true); } template - static bool accept(IteratorType first, IteratorType last) + static bool accept(IteratorType first, IteratorType last, + const bool ignore_comments = false) { - return parser(detail::input_adapter(std::move(first), std::move(last))).accept(true); + return parser(detail::input_adapter(std::move(first), std::move(last)), nullptr, false, ignore_comments).accept(true); } JSON_HEDLEY_WARN_UNUSED_RESULT JSON_HEDLEY_DEPRECATED_FOR(3.8.0, accept(ptr, ptr + len)) - static bool accept(detail::span_input_adapter&& i) + static bool accept(detail::span_input_adapter&& i, + const bool ignore_comments = false) { - return parser(i.get()).accept(true); + return parser(i.get(), nullptr, false, ignore_comments).accept(true); } /*! @@ -6713,6 +6728,9 @@ class basic_json @param[in,out] sax SAX event listener @param[in] format the format to parse (JSON, CBOR, MessagePack, or UBJSON) @param[in] strict whether the input has to be consumed completely + @param[in] ignore_comments whether comments should be ignored (true) or + yield a parse error (true); (optional, false by default); only applieds to + the JSON file format. @return return value of the last processed SAX event @@ -6737,11 +6755,12 @@ class basic_json JSON_HEDLEY_NON_NULL(2) static bool sax_parse(InputType&& i, SAX* sax, input_format_t format = input_format_t::json, - const bool strict = true) + const bool strict = true, + const bool ignore_comments = false) { auto ia = detail::input_adapter(std::forward(i)); return format == input_format_t::json - ? parser(std::move(ia)).sax_parse(sax, strict) + ? parser(std::move(ia), nullptr, true, ignore_comments).sax_parse(sax, strict) : detail::binary_reader(std::move(ia)).sax_parse(format, sax, strict); } @@ -6749,11 +6768,12 @@ class basic_json JSON_HEDLEY_NON_NULL(3) static bool sax_parse(IteratorType first, IteratorType last, SAX* sax, input_format_t format = input_format_t::json, - const bool strict = true) + const bool strict = true, + const bool ignore_comments = false) { auto ia = detail::input_adapter(std::move(first), std::move(last)); return format == input_format_t::json - ? parser(std::move(ia)).sax_parse(sax, strict) + ? parser(std::move(ia), nullptr, true, ignore_comments).sax_parse(sax, strict) : detail::binary_reader(std::move(ia)).sax_parse(format, sax, strict); } @@ -6762,11 +6782,12 @@ class basic_json JSON_HEDLEY_NON_NULL(2) static bool sax_parse(detail::span_input_adapter&& i, SAX* sax, input_format_t format = input_format_t::json, - const bool strict = true) + const bool strict = true, + const bool ignore_comments = false) { auto ia = i.get(); return format == input_format_t::json - ? parser(std::move(ia)).sax_parse(sax, strict) + ? parser(std::move(ia), nullptr, true, ignore_comments).sax_parse(sax, strict) : detail::binary_reader(std::move(ia)).sax_parse(format, sax, strict); } diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index bdd97a14..099fdd8e 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -8904,6 +8904,7 @@ class lexer : public lexer_base { switch (get()) { + // single-line comments skip input until a newline or EOF is read case '/': { while (true) @@ -8912,6 +8913,8 @@ class lexer : public lexer_base { case '\n': case '\r': + case std::char_traits::eof(): + case '\0': return true; default: @@ -8920,6 +8923,7 @@ class lexer : public lexer_base } } + // multi-line comments skip input until */ is read case '*': { while (true) @@ -8944,10 +8948,14 @@ class lexer : public lexer_base } } } + + default: + break; } } } + // unexpected character after reading '/' default: return false; } @@ -9742,8 +9750,11 @@ class parser /// a parser reading from an input adapter explicit parser(InputAdapterType&& adapter, const parser_callback_t cb = nullptr, - const bool allow_exceptions_ = true) - : callback(cb), m_lexer(std::move(adapter)), allow_exceptions(allow_exceptions_) + const bool allow_exceptions_ = true, + const bool skip_comments = false) + : callback(cb) + , m_lexer(std::move(adapter), skip_comments) + , allow_exceptions(allow_exceptions_) { // read first token get_token(); @@ -16051,10 +16062,12 @@ class basic_json static ::nlohmann::detail::parser parser( InputAdapterType adapter, detail::parser_callback_tcb = nullptr, - bool allow_exceptions = true + const bool allow_exceptions = true, + const bool ignore_comments = false ) { - return ::nlohmann::detail::parser(std::move(adapter), std::move(cb), allow_exceptions); + return ::nlohmann::detail::parser(std::move(adapter), + std::move(cb), allow_exceptions, ignore_comments); } using primitive_iterator_t = ::nlohmann::detail::primitive_iterator_t; @@ -22418,6 +22431,8 @@ class basic_json (optional) @param[in] allow_exceptions whether to throw exceptions in case of a parse error (optional, true by default) + @param[in] ignore_comments whether comments should be ignored (true) or + yield a parse error (true); (optional, false by default) @return deserialized JSON value; in case of a parse error and @a allow_exceptions set to `false`, the return value will be @@ -22446,16 +22461,18 @@ class basic_json @liveexample{The example below demonstrates the `parse()` function reading from a contiguous container.,parse__contiguouscontainer__parser_callback_t} - @since version 2.0.3 (contiguous containers) + @since version 2.0.3 (contiguous containers); version 3.9.0 allowed to + ignore comments. */ template JSON_HEDLEY_WARN_UNUSED_RESULT static basic_json parse(InputType&& i, const parser_callback_t cb = nullptr, - const bool allow_exceptions = true) + const bool allow_exceptions = true, + const bool ignore_comments = false) { basic_json result; - parser(detail::input_adapter(std::forward(i)), cb, allow_exceptions).parse(true, result); + parser(detail::input_adapter(std::forward(i)), cb, allow_exceptions, ignore_comments).parse(true, result); return result; } @@ -22472,6 +22489,8 @@ class basic_json (optional) @param[in] allow_exceptions whether to throw exceptions in case of a parse error (optional, true by default) + @param[in] ignore_comments whether comments should be ignored (true) or + yield a parse error (true); (optional, false by default) @return deserialized JSON value; in case of a parse error and @a allow_exceptions set to `false`, the return value will be @@ -22487,10 +22506,11 @@ class basic_json static basic_json parse(IteratorType first, IteratorType last, const parser_callback_t cb = nullptr, - const bool allow_exceptions = true) + const bool allow_exceptions = true, + const bool ignore_comments = false) { basic_json result; - parser(detail::input_adapter(std::move(first), std::move(last)), cb, allow_exceptions).parse(true, result); + parser(detail::input_adapter(std::move(first), std::move(last)), cb, allow_exceptions, ignore_comments).parse(true, result); return result; } @@ -22498,10 +22518,11 @@ class basic_json JSON_HEDLEY_DEPRECATED_FOR(3.8.0, parse(ptr, ptr + len)) static basic_json parse(detail::span_input_adapter&& i, const parser_callback_t cb = nullptr, - const bool allow_exceptions = true) + const bool allow_exceptions = true, + const bool ignore_comments = false) { basic_json result; - parser(i.get(), cb, allow_exceptions).parse(true, result); + parser(i.get(), cb, allow_exceptions, ignore_comments).parse(true, result); return result; } @@ -22521,6 +22542,8 @@ class basic_json iterators. @param[in] i input to read from + @param[in] ignore_comments whether comments should be ignored (true) or + yield a parse error (true); (optional, false by default) @return Whether the input read from @a i is valid JSON. @@ -22533,22 +22556,25 @@ class basic_json from a string.,accept__string} */ template - static bool accept(InputType&& i) + static bool accept(InputType&& i, + const bool ignore_comments = false) { - return parser(detail::input_adapter(std::forward(i))).accept(true); + return parser(detail::input_adapter(std::forward(i)), nullptr, false, ignore_comments).accept(true); } template - static bool accept(IteratorType first, IteratorType last) + static bool accept(IteratorType first, IteratorType last, + const bool ignore_comments = false) { - return parser(detail::input_adapter(std::move(first), std::move(last))).accept(true); + return parser(detail::input_adapter(std::move(first), std::move(last)), nullptr, false, ignore_comments).accept(true); } JSON_HEDLEY_WARN_UNUSED_RESULT JSON_HEDLEY_DEPRECATED_FOR(3.8.0, accept(ptr, ptr + len)) - static bool accept(detail::span_input_adapter&& i) + static bool accept(detail::span_input_adapter&& i, + const bool ignore_comments = false) { - return parser(i.get()).accept(true); + return parser(i.get(), nullptr, false, ignore_comments).accept(true); } /*! @@ -22568,6 +22594,9 @@ class basic_json @param[in,out] sax SAX event listener @param[in] format the format to parse (JSON, CBOR, MessagePack, or UBJSON) @param[in] strict whether the input has to be consumed completely + @param[in] ignore_comments whether comments should be ignored (true) or + yield a parse error (true); (optional, false by default); only applieds to + the JSON file format. @return return value of the last processed SAX event @@ -22592,11 +22621,12 @@ class basic_json JSON_HEDLEY_NON_NULL(2) static bool sax_parse(InputType&& i, SAX* sax, input_format_t format = input_format_t::json, - const bool strict = true) + const bool strict = true, + const bool ignore_comments = false) { auto ia = detail::input_adapter(std::forward(i)); return format == input_format_t::json - ? parser(std::move(ia)).sax_parse(sax, strict) + ? parser(std::move(ia), nullptr, true, ignore_comments).sax_parse(sax, strict) : detail::binary_reader(std::move(ia)).sax_parse(format, sax, strict); } @@ -22604,11 +22634,12 @@ class basic_json JSON_HEDLEY_NON_NULL(3) static bool sax_parse(IteratorType first, IteratorType last, SAX* sax, input_format_t format = input_format_t::json, - const bool strict = true) + const bool strict = true, + const bool ignore_comments = false) { auto ia = detail::input_adapter(std::move(first), std::move(last)); return format == input_format_t::json - ? parser(std::move(ia)).sax_parse(sax, strict) + ? parser(std::move(ia), nullptr, true, ignore_comments).sax_parse(sax, strict) : detail::binary_reader(std::move(ia)).sax_parse(format, sax, strict); } @@ -22617,11 +22648,12 @@ class basic_json JSON_HEDLEY_NON_NULL(2) static bool sax_parse(detail::span_input_adapter&& i, SAX* sax, input_format_t format = input_format_t::json, - const bool strict = true) + const bool strict = true, + const bool ignore_comments = false) { auto ia = i.get(); return format == input_format_t::json - ? parser(std::move(ia)).sax_parse(sax, strict) + ? parser(std::move(ia), nullptr, true, ignore_comments).sax_parse(sax, strict) : detail::binary_reader(std::move(ia)).sax_parse(format, sax, strict); } diff --git a/test/src/unit-class_lexer.cpp b/test/src/unit-class_lexer.cpp index c4423e60..aee4703f 100644 --- a/test/src/unit-class_lexer.cpp +++ b/test/src/unit-class_lexer.cpp @@ -37,11 +37,11 @@ using nlohmann::json; namespace { // shortcut to scan a string literal -json::lexer::token_type scan_string(const char* s); -json::lexer::token_type scan_string(const char* s) +json::lexer::token_type scan_string(const char* s, const bool ignore_comments = false); +json::lexer::token_type scan_string(const char* s, const bool ignore_comments) { auto ia = nlohmann::detail::input_adapter(s); - return nlohmann::detail::lexer(std::move(ia)).scan(); + return nlohmann::detail::lexer(std::move(ia), ignore_comments).scan(); } } @@ -163,9 +163,6 @@ TEST_CASE("lexer class") break; } - // case ('/'): - // break; - // anything else is not expected default: { @@ -185,18 +182,39 @@ TEST_CASE("lexer class") CHECK((scan_string(s.c_str()) == json::lexer::token_type::value_string)); } - // SECTION("ignore comments") - // { - // CHECK((scan_string("/") == json::lexer::token_type::parse_error)); - // - // CHECK((scan_string("/!") == json::lexer::token_type::parse_error)); - // CHECK((scan_string("/*") == json::lexer::token_type::parse_error)); - // CHECK((scan_string("/**") == json::lexer::token_type::parse_error)); - // - // CHECK((scan_string("//") == json::lexer::token_type::end_of_input)); - // CHECK((scan_string("/**/") == json::lexer::token_type::end_of_input)); - // CHECK((scan_string("/** /") == json::lexer::token_type::parse_error)); - // - // CHECK((scan_string("/***/") == json::lexer::token_type::end_of_input)); - // } + SECTION("fail on comments") + { + CHECK((scan_string("/", false) == json::lexer::token_type::parse_error)); + + CHECK((scan_string("/!", false) == json::lexer::token_type::parse_error)); + CHECK((scan_string("/*", false) == json::lexer::token_type::parse_error)); + CHECK((scan_string("/**", false) == json::lexer::token_type::parse_error)); + + CHECK((scan_string("//", false) == json::lexer::token_type::parse_error)); + CHECK((scan_string("/**/", false) == json::lexer::token_type::parse_error)); + CHECK((scan_string("/** /", false) == json::lexer::token_type::parse_error)); + + CHECK((scan_string("/***/", false) == json::lexer::token_type::parse_error)); + CHECK((scan_string("/* true */", false) == json::lexer::token_type::parse_error)); + CHECK((scan_string("/*/**/", false) == json::lexer::token_type::parse_error)); + CHECK((scan_string("/*/* */", false) == json::lexer::token_type::parse_error)); + } + + SECTION("ignore comments") + { + CHECK((scan_string("/", true) == json::lexer::token_type::parse_error)); + + CHECK((scan_string("/!", true) == json::lexer::token_type::parse_error)); + CHECK((scan_string("/*", true) == json::lexer::token_type::parse_error)); + CHECK((scan_string("/**", true) == json::lexer::token_type::parse_error)); + + CHECK((scan_string("//", true) == json::lexer::token_type::end_of_input)); + CHECK((scan_string("/**/", true) == json::lexer::token_type::end_of_input)); + CHECK((scan_string("/** /", true) == json::lexer::token_type::parse_error)); + + CHECK((scan_string("/***/", true) == json::lexer::token_type::end_of_input)); + CHECK((scan_string("/* true */", true) == json::lexer::token_type::end_of_input)); + CHECK((scan_string("/*/**/", true) == json::lexer::token_type::end_of_input)); + CHECK((scan_string("/*/* */", true) == json::lexer::token_type::end_of_input)); + } } From 0585ecc56b210e045e639c41474cc72c475c8b76 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Fri, 19 Jun 2020 13:10:35 +0200 Subject: [PATCH 4/6] :white_check_mark: add tests for comment skipping --- include/nlohmann/detail/input/lexer.hpp | 8 ++++- single_include/nlohmann/json.hpp | 8 ++++- test/src/unit-class_parser.cpp | 45 ++++++++++++++++++++++++- 3 files changed, 58 insertions(+), 3 deletions(-) diff --git a/include/nlohmann/detail/input/lexer.hpp b/include/nlohmann/detail/input/lexer.hpp index d5e243e6..9dba2972 100644 --- a/include/nlohmann/detail/input/lexer.hpp +++ b/include/nlohmann/detail/input/lexer.hpp @@ -1507,7 +1507,13 @@ scan_number_done: error_message = "invalid comment"; return token_type::parse_error; } - get(); + + // skip following whitespace + do + { + get(); + } + while (current == ' ' or current == '\t' or current == '\n' or current == '\r'); } switch (current) diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index 099fdd8e..c341d40d 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -9574,7 +9574,13 @@ scan_number_done: error_message = "invalid comment"; return token_type::parse_error; } - get(); + + // skip following whitespace + do + { + get(); + } + while (current == ' ' or current == '\t' or current == '\n' or current == '\r'); } switch (current) diff --git a/test/src/unit-class_parser.cpp b/test/src/unit-class_parser.cpp index 1912094a..de5c9638 100644 --- a/test/src/unit-class_parser.cpp +++ b/test/src/unit-class_parser.cpp @@ -224,6 +224,7 @@ class SaxCountdown : public nlohmann::json::json_sax_t json parser_helper(const std::string& s); bool accept_helper(const std::string& s); +void comments_helper(const std::string& s); json parser_helper(const std::string& s) { @@ -241,6 +242,8 @@ json parser_helper(const std::string& s) json::sax_parse(s, &sdp); CHECK(j_sax == j); + comments_helper(s); + return j; } @@ -275,11 +278,51 @@ bool accept_helper(const std::string& s) // 6. check if this approach came to the same result CHECK(ok_noexcept == ok_noexcept_cb); - // 7. return result + // 7. check if comments are properly ignored + if (ok_accept) + { + comments_helper(s); + } + + // 8. return result return ok_accept; } + +void comments_helper(const std::string& s) +{ + json _; + + // parse/accept with default parser + CHECK_NOTHROW(_ = json::parse(s)); + CHECK(json::accept(s)); + + // parse/accept while skipping comments + CHECK_NOTHROW(_ = json::parse(s, nullptr, false, true)); + CHECK(json::accept(s, true)); + + std::vector json_with_comments; + + // start with a comment + json_with_comments.push_back(std::string("// this is a comment\n") + s); + json_with_comments.push_back(std::string("/* this is a comment */") + s); + // end with a comment + json_with_comments.push_back(s + "// this is a comment"); + json_with_comments.push_back(s + "/* this is a comment */"); + + // check all strings + for (const auto& json_with_comment : json_with_comments) + { + CAPTURE(json_with_comment) + CHECK_THROWS_AS(_ = json::parse(json_with_comment), json::parse_error); + CHECK(not json::accept(json_with_comment)); + + CHECK_NOTHROW(_ = json::parse(json_with_comment, nullptr, true, true)); + CHECK(json::accept(json_with_comment, true)); + } } +} // namespace + TEST_CASE("parser class") { SECTION("parse") From e22ce4506564f3630e4cd63dc0795115df90be71 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Sun, 21 Jun 2020 13:28:00 +0200 Subject: [PATCH 5/6] :children_crossing: improve diagnostics --- include/nlohmann/detail/input/lexer.hpp | 7 ++++++- single_include/nlohmann/json.hpp | 7 ++++++- test/src/unit-class_lexer.cpp | 25 +++++++++++++++++++++++++ test/src/unit-class_parser.cpp | 6 ++++++ 4 files changed, 43 insertions(+), 2 deletions(-) diff --git a/include/nlohmann/detail/input/lexer.hpp b/include/nlohmann/detail/input/lexer.hpp index 9dba2972..580b1c22 100644 --- a/include/nlohmann/detail/input/lexer.hpp +++ b/include/nlohmann/detail/input/lexer.hpp @@ -865,7 +865,10 @@ class lexer : public lexer_base { case std::char_traits::eof(): case '\0': + { + error_message = "invalid comment; missing closing '*/'"; return false; + } case '*': { @@ -890,7 +893,10 @@ class lexer : public lexer_base // unexpected character after reading '/' default: + { + error_message = "invalid comment; expecting '/' or '*' after '/'"; return false; + } } } @@ -1504,7 +1510,6 @@ scan_number_done: { if (not scan_comment()) { - error_message = "invalid comment"; return token_type::parse_error; } diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index f4fe8642..cdc3de09 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -8932,7 +8932,10 @@ class lexer : public lexer_base { case std::char_traits::eof(): case '\0': + { + error_message = "invalid comment; missing closing '*/'"; return false; + } case '*': { @@ -8957,7 +8960,10 @@ class lexer : public lexer_base // unexpected character after reading '/' default: + { + error_message = "invalid comment; expecting '/' or '*' after '/'"; return false; + } } } @@ -9571,7 +9577,6 @@ scan_number_done: { if (not scan_comment()) { - error_message = "invalid comment"; return token_type::parse_error; } diff --git a/test/src/unit-class_lexer.cpp b/test/src/unit-class_lexer.cpp index aee4703f..d8304ccf 100644 --- a/test/src/unit-class_lexer.cpp +++ b/test/src/unit-class_lexer.cpp @@ -45,6 +45,15 @@ json::lexer::token_type scan_string(const char* s, const bool ignore_comments) } } +std::string get_error_message(const char* s, const bool ignore_comments = false); +std::string get_error_message(const char* s, const bool ignore_comments) +{ + auto ia = nlohmann::detail::input_adapter(s); + auto lexer = nlohmann::detail::lexer(std::move(ia), ignore_comments); + lexer.scan(); + return lexer.get_error_message(); +} + TEST_CASE("lexer class") { SECTION("scan") @@ -185,32 +194,48 @@ TEST_CASE("lexer class") SECTION("fail on comments") { CHECK((scan_string("/", false) == json::lexer::token_type::parse_error)); + CHECK(get_error_message("/", false) == "invalid literal"); CHECK((scan_string("/!", false) == json::lexer::token_type::parse_error)); + CHECK(get_error_message("/!", false) == "invalid literal"); CHECK((scan_string("/*", false) == json::lexer::token_type::parse_error)); + CHECK(get_error_message("/*", false) == "invalid literal"); CHECK((scan_string("/**", false) == json::lexer::token_type::parse_error)); + CHECK(get_error_message("/**", false) == "invalid literal"); CHECK((scan_string("//", false) == json::lexer::token_type::parse_error)); + CHECK(get_error_message("//", false) == "invalid literal"); CHECK((scan_string("/**/", false) == json::lexer::token_type::parse_error)); + CHECK(get_error_message("/**/", false) == "invalid literal"); CHECK((scan_string("/** /", false) == json::lexer::token_type::parse_error)); + CHECK(get_error_message("/** /", false) == "invalid literal"); CHECK((scan_string("/***/", false) == json::lexer::token_type::parse_error)); + CHECK(get_error_message("/***/", false) == "invalid literal"); CHECK((scan_string("/* true */", false) == json::lexer::token_type::parse_error)); + CHECK(get_error_message("/* true */", false) == "invalid literal"); CHECK((scan_string("/*/**/", false) == json::lexer::token_type::parse_error)); + CHECK(get_error_message("/*/**/", false) == "invalid literal"); CHECK((scan_string("/*/* */", false) == json::lexer::token_type::parse_error)); + CHECK(get_error_message("/*/* */", false) == "invalid literal"); } SECTION("ignore comments") { CHECK((scan_string("/", true) == json::lexer::token_type::parse_error)); + CHECK(get_error_message("/", true) == "invalid comment; expecting '/' or '*' after '/'"); CHECK((scan_string("/!", true) == json::lexer::token_type::parse_error)); + CHECK(get_error_message("/!", true) == "invalid comment; expecting '/' or '*' after '/'"); CHECK((scan_string("/*", true) == json::lexer::token_type::parse_error)); + CHECK(get_error_message("/*", true) == "invalid comment; missing closing '*/'"); CHECK((scan_string("/**", true) == json::lexer::token_type::parse_error)); + CHECK(get_error_message("/**", true) == "invalid comment; missing closing '*/'"); CHECK((scan_string("//", true) == json::lexer::token_type::end_of_input)); CHECK((scan_string("/**/", true) == json::lexer::token_type::end_of_input)); CHECK((scan_string("/** /", true) == json::lexer::token_type::parse_error)); + CHECK(get_error_message("/** /", true) == "invalid comment; missing closing '*/'"); CHECK((scan_string("/***/", true) == json::lexer::token_type::end_of_input)); CHECK((scan_string("/* true */", true) == json::lexer::token_type::end_of_input)); diff --git a/test/src/unit-class_parser.cpp b/test/src/unit-class_parser.cpp index de5c9638..da16ffca 100644 --- a/test/src/unit-class_parser.cpp +++ b/test/src/unit-class_parser.cpp @@ -1877,4 +1877,10 @@ TEST_CASE("parser class") } } } + + SECTION("error messages for comments") + { + CHECK_THROWS_WITH_AS(json::parse("/a", nullptr, true, true), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid comment; expecting '/' or '*' after '/'; last read: '/a'", json::parse_error); + CHECK_THROWS_WITH_AS(json::parse("/*", nullptr, true, true), "[json.exception.parse_error.101] parse error at line 1, column 3: syntax error while parsing value - invalid comment; missing closing '*/'; last read: '/*'", json::parse_error); + } } From 65e8ee985ae31e73223e6f722c26d6a1fb0563c8 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Mon, 22 Jun 2020 08:59:03 +0200 Subject: [PATCH 6/6] :hammer: clean up --- README.md | 8 ++--- include/nlohmann/detail/input/lexer.hpp | 21 +++++++------ include/nlohmann/json.hpp | 21 +++++++------ single_include/nlohmann/json.hpp | 42 ++++++++++++++----------- 4 files changed, 48 insertions(+), 44 deletions(-) diff --git a/README.md b/README.md index 42d057a3..0699bdc3 100644 --- a/README.md +++ b/README.md @@ -1508,7 +1508,7 @@ The library supports **Unicode input** as follows: ### Comments in JSON -This library does not support comments. It does so for three reasons: +This library does not support comments by default. It does so for three reasons: 1. Comments are not part of the [JSON specification](https://tools.ietf.org/html/rfc8259). You may argue that `//` or `/* */` are allowed in JavaScript, but JSON is not JavaScript. 2. This was not an oversight: Douglas Crockford [wrote on this](https://plus.google.com/118095276221607585885/posts/RK8qyGVaGSr) in May 2012: @@ -1519,11 +1519,7 @@ This library does not support comments. It does so for three reasons: 3. It is dangerous for interoperability if some libraries would add comment support while others don't. Please check [The Harmful Consequences of the Robustness Principle](https://tools.ietf.org/html/draft-iab-protocol-maintenance-01) on this. -This library will not support comments in the future. If you wish to use comments, I see three options: - -1. Strip comments before using this library. -2. Use a different JSON library with comment support. -3. Use a format that natively supports comments (e.g., YAML or JSON5). +However, you can pass set parameter `ignore_comments` to true in the `parse` function to ignore `//` or `/* */` comments. Comments will then be treated as whitespace. ### Order of object keys diff --git a/include/nlohmann/detail/input/lexer.hpp b/include/nlohmann/detail/input/lexer.hpp index 580b1c22..8042f3c4 100644 --- a/include/nlohmann/detail/input/lexer.hpp +++ b/include/nlohmann/detail/input/lexer.hpp @@ -1489,6 +1489,15 @@ scan_number_done: return true; } + void skip_whitespace() + { + do + { + get(); + } + while (current == ' ' or current == '\t' or current == '\n' or current == '\r'); + } + token_type scan() { // initially, skip the BOM @@ -1499,11 +1508,7 @@ scan_number_done: } // read next character and ignore whitespace - do - { - get(); - } - while (current == ' ' or current == '\t' or current == '\n' or current == '\r'); + skip_whitespace(); // ignore comments if (ignore_comments and current == '/') @@ -1514,11 +1519,7 @@ scan_number_done: } // skip following whitespace - do - { - get(); - } - while (current == ' ' or current == '\t' or current == '\n' or current == '\r'); + skip_whitespace(); } switch (current) diff --git a/include/nlohmann/json.hpp b/include/nlohmann/json.hpp index f4fa590c..cc7e3fbc 100644 --- a/include/nlohmann/json.hpp +++ b/include/nlohmann/json.hpp @@ -6565,8 +6565,9 @@ class basic_json (optional) @param[in] allow_exceptions whether to throw exceptions in case of a parse error (optional, true by default) - @param[in] ignore_comments whether comments should be ignored (true) or - yield a parse error (true); (optional, false by default) + @param[in] ignore_comments whether comments should be ignored and treated + like whitespace (true) or yield a parse error (true); (optional, false by + default) @return deserialized JSON value; in case of a parse error and @a allow_exceptions set to `false`, the return value will be @@ -6623,8 +6624,9 @@ class basic_json (optional) @param[in] allow_exceptions whether to throw exceptions in case of a parse error (optional, true by default) - @param[in] ignore_comments whether comments should be ignored (true) or - yield a parse error (true); (optional, false by default) + @param[in] ignore_comments whether comments should be ignored and treated + like whitespace (true) or yield a parse error (true); (optional, false by + default) @return deserialized JSON value; in case of a parse error and @a allow_exceptions set to `false`, the return value will be @@ -6676,8 +6678,9 @@ class basic_json iterators. @param[in] i input to read from - @param[in] ignore_comments whether comments should be ignored (true) or - yield a parse error (true); (optional, false by default) + @param[in] ignore_comments whether comments should be ignored and treated + like whitespace (true) or yield a parse error (true); (optional, false by + default) @return Whether the input read from @a i is valid JSON. @@ -6728,9 +6731,9 @@ class basic_json @param[in,out] sax SAX event listener @param[in] format the format to parse (JSON, CBOR, MessagePack, or UBJSON) @param[in] strict whether the input has to be consumed completely - @param[in] ignore_comments whether comments should be ignored (true) or - yield a parse error (true); (optional, false by default); only applieds to - the JSON file format. + @param[in] ignore_comments whether comments should be ignored and treated + like whitespace (true) or yield a parse error (true); (optional, false by + default); only applies to the JSON file format. @return return value of the last processed SAX event diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index cdc3de09..683e2d56 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -9556,6 +9556,15 @@ scan_number_done: return true; } + void skip_whitespace() + { + do + { + get(); + } + while (current == ' ' or current == '\t' or current == '\n' or current == '\r'); + } + token_type scan() { // initially, skip the BOM @@ -9566,11 +9575,7 @@ scan_number_done: } // read next character and ignore whitespace - do - { - get(); - } - while (current == ' ' or current == '\t' or current == '\n' or current == '\r'); + skip_whitespace(); // ignore comments if (ignore_comments and current == '/') @@ -9581,11 +9586,7 @@ scan_number_done: } // skip following whitespace - do - { - get(); - } - while (current == ' ' or current == '\t' or current == '\n' or current == '\r'); + skip_whitespace(); } switch (current) @@ -22451,8 +22452,9 @@ class basic_json (optional) @param[in] allow_exceptions whether to throw exceptions in case of a parse error (optional, true by default) - @param[in] ignore_comments whether comments should be ignored (true) or - yield a parse error (true); (optional, false by default) + @param[in] ignore_comments whether comments should be ignored and treated + like whitespace (true) or yield a parse error (true); (optional, false by + default) @return deserialized JSON value; in case of a parse error and @a allow_exceptions set to `false`, the return value will be @@ -22509,8 +22511,9 @@ class basic_json (optional) @param[in] allow_exceptions whether to throw exceptions in case of a parse error (optional, true by default) - @param[in] ignore_comments whether comments should be ignored (true) or - yield a parse error (true); (optional, false by default) + @param[in] ignore_comments whether comments should be ignored and treated + like whitespace (true) or yield a parse error (true); (optional, false by + default) @return deserialized JSON value; in case of a parse error and @a allow_exceptions set to `false`, the return value will be @@ -22562,8 +22565,9 @@ class basic_json iterators. @param[in] i input to read from - @param[in] ignore_comments whether comments should be ignored (true) or - yield a parse error (true); (optional, false by default) + @param[in] ignore_comments whether comments should be ignored and treated + like whitespace (true) or yield a parse error (true); (optional, false by + default) @return Whether the input read from @a i is valid JSON. @@ -22614,9 +22618,9 @@ class basic_json @param[in,out] sax SAX event listener @param[in] format the format to parse (JSON, CBOR, MessagePack, or UBJSON) @param[in] strict whether the input has to be consumed completely - @param[in] ignore_comments whether comments should be ignored (true) or - yield a parse error (true); (optional, false by default); only applieds to - the JSON file format. + @param[in] ignore_comments whether comments should be ignored and treated + like whitespace (true) or yield a parse error (true); (optional, false by + default); only applies to the JSON file format. @return return value of the last processed SAX event