diff --git a/include/nlohmann/detail/input/lexer.hpp b/include/nlohmann/detail/input/lexer.hpp index 0ff0c736..00af0ce2 100644 --- a/include/nlohmann/detail/input/lexer.hpp +++ b/include/nlohmann/detail/input/lexer.hpp @@ -112,8 +112,11 @@ class lexer : public lexer_base public: using token_type = typename lexer_base::token_type; - explicit lexer(InputAdapterType&& adapter) - : ia(std::move(adapter)), decimal_point_char(static_cast(get_decimal_point())) {} + explicit lexer(InputAdapterType&& adapter, bool ignore_comments_ = false) + : ia(std::move(adapter)) + , ignore_comments(ignore_comments_) + , decimal_point_char(static_cast(get_decimal_point())) + {} // delete because of pointer members lexer(const lexer&) = delete; @@ -131,7 +134,7 @@ class lexer : public lexer_base JSON_HEDLEY_PURE static char get_decimal_point() noexcept { - const auto loc = localeconv(); + const auto* loc = localeconv(); assert(loc != nullptr); return (loc->decimal_point == nullptr) ? '.' : *(loc->decimal_point); } @@ -826,6 +829,71 @@ class lexer : public lexer_base } } + /*! + * @brief scan a comment + * @return whether comment could be scanned successfully + */ + bool scan_comment() + { + // remember character after '/' to distinguish comment types + const auto comment_char = get(); + + // expect // or /* to start a comment + if (comment_char != '/' and comment_char != '*') + { + return false; + } + + while (true) + { + switch (get()) + { + // EOF inside a /* comment is an error, in // it is OK + case std::char_traits::eof(): + case '\0': + { + return comment_char == '/'; + } + + // a newline ends the // comment + case '\n': + case '\r': + { + if (comment_char == '/') + { + return true; + } + break; + } + + // */ ends the /* comment + case '*': + { + if (comment_char == '*') + { + switch (get()) + { + case '/': + { + return true; + } + + default: + { + unget(); + break; + } + } + } + break; + } + + default: + break; + } + } + } + JSON_HEDLEY_NON_NULL(2) static void strtof(float& f, const char* str, char** endptr) noexcept { @@ -1431,6 +1499,17 @@ scan_number_done: } while (current == ' ' or current == '\t' or current == '\n' or current == '\r'); + // ignore comments + if (ignore_comments and current == '/') + { + if (not scan_comment()) + { + error_message = "invalid comment"; + return token_type::parse_error; + } + get(); + } + switch (current) { // structural characters @@ -1499,6 +1578,9 @@ scan_number_done: /// input adapter InputAdapterType ia; + /// whether comments should be ignored (true) or signaled as errors (false) + const bool ignore_comments = false; + /// the current character char_int_type current = std::char_traits::eof(); diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index cc822a54..82435fa7 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -8179,8 +8179,11 @@ class lexer : public lexer_base public: using token_type = typename lexer_base::token_type; - explicit lexer(InputAdapterType&& adapter) - : ia(std::move(adapter)), decimal_point_char(static_cast(get_decimal_point())) {} + explicit lexer(InputAdapterType&& adapter, bool ignore_comments_ = false) + : ia(std::move(adapter)) + , ignore_comments(ignore_comments_) + , decimal_point_char(static_cast(get_decimal_point())) + {} // delete because of pointer members lexer(const lexer&) = delete; @@ -8198,7 +8201,7 @@ class lexer : public lexer_base JSON_HEDLEY_PURE static char get_decimal_point() noexcept { - const auto loc = localeconv(); + const auto* loc = localeconv(); assert(loc != nullptr); return (loc->decimal_point == nullptr) ? '.' : *(loc->decimal_point); } @@ -8893,6 +8896,71 @@ class lexer : public lexer_base } } + /*! + * @brief scan a comment + * @return whether comment could be scanned successfully + */ + bool scan_comment() + { + // remember character after '/' to distinguish comment types + const auto comment_char = get(); + + // expect // or /* to start a comment + if (comment_char != '/' and comment_char != '*') + { + return false; + } + + while (true) + { + switch (get()) + { + // EOF inside a /* comment is an error, in // it is OK + case std::char_traits::eof(): + case '\0': + { + return comment_char == '/'; + } + + // a newline ends the // comment + case '\n': + case '\r': + { + if (comment_char == '/') + { + return true; + } + break; + } + + // */ ends the /* comment + case '*': + { + if (comment_char == '*') + { + switch (get()) + { + case '/': + { + return true; + } + + default: + { + unget(); + break; + } + } + } + break; + } + + default: + break; + } + } + } + JSON_HEDLEY_NON_NULL(2) static void strtof(float& f, const char* str, char** endptr) noexcept { @@ -9498,6 +9566,17 @@ scan_number_done: } while (current == ' ' or current == '\t' or current == '\n' or current == '\r'); + // ignore comments + if (ignore_comments and current == '/') + { + if (not scan_comment()) + { + error_message = "invalid comment"; + return token_type::parse_error; + } + get(); + } + switch (current) { // structural characters @@ -9566,6 +9645,9 @@ scan_number_done: /// input adapter InputAdapterType ia; + /// whether comments should be ignored (true) or signaled as errors (false) + const bool ignore_comments = false; + /// the current character char_int_type current = std::char_traits::eof(); diff --git a/test/src/unit-class_lexer.cpp b/test/src/unit-class_lexer.cpp index 0e1b5378..c4423e60 100644 --- a/test/src/unit-class_lexer.cpp +++ b/test/src/unit-class_lexer.cpp @@ -127,6 +127,8 @@ TEST_CASE("lexer class") // store scan() result const auto res = scan_string(s.c_str()); + CAPTURE(s); + switch (c) { // single characters that are valid tokens @@ -161,6 +163,9 @@ TEST_CASE("lexer class") break; } + // case ('/'): + // break; + // anything else is not expected default: { @@ -179,4 +184,19 @@ TEST_CASE("lexer class") s += "\""; CHECK((scan_string(s.c_str()) == json::lexer::token_type::value_string)); } + + // SECTION("ignore comments") + // { + // CHECK((scan_string("/") == json::lexer::token_type::parse_error)); + // + // CHECK((scan_string("/!") == json::lexer::token_type::parse_error)); + // CHECK((scan_string("/*") == json::lexer::token_type::parse_error)); + // CHECK((scan_string("/**") == json::lexer::token_type::parse_error)); + // + // CHECK((scan_string("//") == json::lexer::token_type::end_of_input)); + // CHECK((scan_string("/**/") == json::lexer::token_type::end_of_input)); + // CHECK((scan_string("/** /") == json::lexer::token_type::parse_error)); + // + // CHECK((scan_string("/***/") == json::lexer::token_type::end_of_input)); + // } }