✨ ignore comments

2020-06-16 12:28:59 +02:00 · 2020-06-16 12:28:59 +02:00 · b53c6e2f81
commit b53c6e2f81
parent e7452d8778
3 changed files with 190 additions and 6 deletions
--- a/include/nlohmann/detail/input/lexer.hpp
+++ b/include/nlohmann/detail/input/lexer.hpp
@ -112,8 +112,11 @@ class lexer : public lexer_base<BasicJsonType>
  public:
    using token_type = typename lexer_base<BasicJsonType>::token_type;

-    explicit lexer(InputAdapterType&& adapter)
-        : ia(std::move(adapter)), decimal_point_char(static_cast<char_int_type>(get_decimal_point())) {}
+    explicit lexer(InputAdapterType&& adapter, bool ignore_comments_ = false)
+        : ia(std::move(adapter))
+        , ignore_comments(ignore_comments_)
+        , decimal_point_char(static_cast<char_int_type>(get_decimal_point()))
+    {}

    // delete because of pointer members
    lexer(const lexer&) = delete;
@ -131,7 +134,7 @@ class lexer : public lexer_base<BasicJsonType>
    JSON_HEDLEY_PURE
    static char get_decimal_point() noexcept
    {
-        const auto loc = localeconv();
+        const auto* loc = localeconv();
        assert(loc != nullptr);
        return (loc->decimal_point == nullptr) ? '.' : *(loc->decimal_point);
    }
@ -826,6 +829,71 @@ class lexer : public lexer_base<BasicJsonType>
        }
    }

+    /*!
+     * @brief scan a comment
+     * @return whether comment could be scanned successfully
+     */
+    bool scan_comment()
+    {
+        // remember character after '/' to distinguish comment types
+        const auto comment_char = get();
+
+        // expect // or /* to start a comment
+        if (comment_char != '/' and comment_char != '*')
+        {
+            return false;
+        }
+
+        while (true)
+        {
+            switch (get())
+            {
+                // EOF inside a /* comment is an error, in // it is OK
+                case std::char_traits<char_type>::eof():
+                case '\0':
+                {
+                    return comment_char == '/';
+                }
+
+                // a newline ends the // comment
+                case '\n':
+                case '\r':
+                {
+                    if (comment_char == '/')
+                    {
+                        return true;
+                    }
+                    break;
+                }
+
+                // */ ends the /* comment
+                case '*':
+                {
+                    if (comment_char == '*')
+                    {
+                        switch (get())
+                        {
+                            case '/':
+                            {
+                                return true;
+                            }
+
+                            default:
+                            {
+                                unget();
+                                break;
+                            }
+                        }
+                    }
+                    break;
+                }
+
+                default:
+                    break;
+            }
+        }
+    }
+
    JSON_HEDLEY_NON_NULL(2)
    static void strtof(float& f, const char* str, char** endptr) noexcept
    {
@ -1431,6 +1499,17 @@ scan_number_done:
        }
        while (current == ' ' or current == '\t' or current == '\n' or current == '\r');

+        // ignore comments
+        if (ignore_comments and current == '/')
+        {
+            if (not scan_comment())
+            {
+                error_message = "invalid comment";
+                return token_type::parse_error;
+            }
+            get();
+        }
+
        switch (current)
        {
            // structural characters
@ -1499,6 +1578,9 @@ scan_number_done:
    /// input adapter
    InputAdapterType ia;

+    /// whether comments should be ignored (true) or signaled as errors (false)
+    const bool ignore_comments = false;
+
    /// the current character
    char_int_type current = std::char_traits<char_type>::eof();

--- a/single_include/nlohmann/json.hpp
+++ b/single_include/nlohmann/json.hpp
@ -8179,8 +8179,11 @@ class lexer : public lexer_base<BasicJsonType>
  public:
    using token_type = typename lexer_base<BasicJsonType>::token_type;

-    explicit lexer(InputAdapterType&& adapter)
-        : ia(std::move(adapter)), decimal_point_char(static_cast<char_int_type>(get_decimal_point())) {}
+    explicit lexer(InputAdapterType&& adapter, bool ignore_comments_ = false)
+        : ia(std::move(adapter))
+        , ignore_comments(ignore_comments_)
+        , decimal_point_char(static_cast<char_int_type>(get_decimal_point()))
+    {}

    // delete because of pointer members
    lexer(const lexer&) = delete;
@ -8198,7 +8201,7 @@ class lexer : public lexer_base<BasicJsonType>
    JSON_HEDLEY_PURE
    static char get_decimal_point() noexcept
    {
-        const auto loc = localeconv();
+        const auto* loc = localeconv();
        assert(loc != nullptr);
        return (loc->decimal_point == nullptr) ? '.' : *(loc->decimal_point);
    }
@ -8893,6 +8896,71 @@ class lexer : public lexer_base<BasicJsonType>
        }
    }

+    /*!
+     * @brief scan a comment
+     * @return whether comment could be scanned successfully
+     */
+    bool scan_comment()
+    {
+        // remember character after '/' to distinguish comment types
+        const auto comment_char = get();
+
+        // expect // or /* to start a comment
+        if (comment_char != '/' and comment_char != '*')
+        {
+            return false;
+        }
+
+        while (true)
+        {
+            switch (get())
+            {
+                // EOF inside a /* comment is an error, in // it is OK
+                case std::char_traits<char_type>::eof():
+                case '\0':
+                {
+                    return comment_char == '/';
+                }
+
+                // a newline ends the // comment
+                case '\n':
+                case '\r':
+                {
+                    if (comment_char == '/')
+                    {
+                        return true;
+                    }
+                    break;
+                }
+
+                // */ ends the /* comment
+                case '*':
+                {
+                    if (comment_char == '*')
+                    {
+                        switch (get())
+                        {
+                            case '/':
+                            {
+                                return true;
+                            }
+
+                            default:
+                            {
+                                unget();
+                                break;
+                            }
+                        }
+                    }
+                    break;
+                }
+
+                default:
+                    break;
+            }
+        }
+    }
+
    JSON_HEDLEY_NON_NULL(2)
    static void strtof(float& f, const char* str, char** endptr) noexcept
    {
@ -9498,6 +9566,17 @@ scan_number_done:
        }
        while (current == ' ' or current == '\t' or current == '\n' or current == '\r');

+        // ignore comments
+        if (ignore_comments and current == '/')
+        {
+            if (not scan_comment())
+            {
+                error_message = "invalid comment";
+                return token_type::parse_error;
+            }
+            get();
+        }
+
        switch (current)
        {
            // structural characters
@ -9566,6 +9645,9 @@ scan_number_done:
    /// input adapter
    InputAdapterType ia;

+    /// whether comments should be ignored (true) or signaled as errors (false)
+    const bool ignore_comments = false;
+
    /// the current character
    char_int_type current = std::char_traits<char_type>::eof();

--- a/test/src/unit-class_lexer.cpp
+++ b/test/src/unit-class_lexer.cpp
@ -127,6 +127,8 @@ TEST_CASE("lexer class")
            // store scan() result
            const auto res = scan_string(s.c_str());

+            CAPTURE(s);
+
            switch (c)
            {
                // single characters that are valid tokens
@ -161,6 +163,9 @@ TEST_CASE("lexer class")
                    break;
                }

+                //                case ('/'):
+                //                    break;
+
                // anything else is not expected
                default:
                {
@ -179,4 +184,19 @@ TEST_CASE("lexer class")
        s += "\"";
        CHECK((scan_string(s.c_str()) == json::lexer::token_type::value_string));
    }
+
+    //    SECTION("ignore comments")
+    //    {
+    //        CHECK((scan_string("/") == json::lexer::token_type::parse_error));
+    //
+    //        CHECK((scan_string("/!") == json::lexer::token_type::parse_error));
+    //        CHECK((scan_string("/*") == json::lexer::token_type::parse_error));
+    //        CHECK((scan_string("/**") == json::lexer::token_type::parse_error));
+    //
+    //        CHECK((scan_string("//") == json::lexer::token_type::end_of_input));
+    //        CHECK((scan_string("/**/") == json::lexer::token_type::end_of_input));
+    //        CHECK((scan_string("/** /") == json::lexer::token_type::parse_error));
+    //
+    //        CHECK((scan_string("/***/") == json::lexer::token_type::end_of_input));
+    //    }
 }