Merge branch 'develop' of https://github.com/nlohmann/json into clang_windows

Conflicts: include/nlohmann/detail/input/binary_reader.hpp include/nlohmann/detail/input/input_adapters.hpp include/nlohmann/detail/input/lexer.hpp include/nlohmann/detail/output/binary_writer.hpp include/nlohmann/json.hpp single_include/nlohmann/json.hpp
2020-06-27 13:14:48 +02:00 · 2020-06-27 13:14:48 +02:00 · ac3922c7aa
commit ac3922c7aa
parent 24e8562664 5ba0f65c34
128 changed files with 7773 additions and 2268 deletions
--- a/include/nlohmann/detail/input/lexer.hpp
+++ b/include/nlohmann/detail/input/lexer.hpp
@ -106,12 +106,17 @@ class lexer : public lexer_base<BasicJsonType>
    using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
    using number_float_t = typename BasicJsonType::number_float_t;
    using string_t = typename BasicJsonType::string_t;
+    using char_type = typename InputAdapterType::char_type;
+    using char_int_type = typename std::char_traits<char_type>::int_type;

  public:
    using token_type = typename lexer_base<BasicJsonType>::token_type;

-    explicit lexer(InputAdapterType&& adapter)
-        : ia(std::move(adapter)), decimal_point_char(get_decimal_point()) {}
+    explicit lexer(InputAdapterType&& adapter, bool ignore_comments_ = false)
+        : ia(std::move(adapter))
+        , ignore_comments(ignore_comments_)
+        , decimal_point_char(static_cast<char_int_type>(get_decimal_point()))
+    {}

    // delete because of pointer members
    lexer(const lexer&) = delete;
@ -129,7 +134,7 @@ class lexer : public lexer_base<BasicJsonType>
    JSON_HEDLEY_PURE
    static char get_decimal_point() noexcept
    {
-        const auto loc = localeconv();
+        const auto* loc = localeconv();
        assert(loc != nullptr);
        return (loc->decimal_point == nullptr) ? '.' : *(loc->decimal_point);
    }
@ -201,7 +206,7 @@ class lexer : public lexer_base<BasicJsonType>

    @return true if and only if no range violation was detected
    */
-    bool next_byte_in_range(std::initializer_list<int> ranges)
+    bool next_byte_in_range(std::initializer_list<char_int_type> ranges)
    {
        assert(ranges.size() == 2 || ranges.size() == 4 || ranges.size() == 6);
        add(current);
@ -252,7 +257,7 @@ class lexer : public lexer_base<BasicJsonType>
            switch (get())
            {
                // end of file while parsing string
-                case std::char_traits<char>::eof():
+                case std::char_traits<char_type>::eof():
                {
                    error_message = "invalid string: missing closing quote";
                    return token_type::parse_error;
@ -370,28 +375,28 @@ class lexer : public lexer_base<BasicJsonType>
                            if (codepoint < 0x80)
                            {
                                // 1-byte characters: 0xxxxxxx (ASCII)
-                                add(codepoint);
+                                add(static_cast<char_int_type>(codepoint));
                            }
                            else if (codepoint <= 0x7FF)
                            {
                                // 2-byte characters: 110xxxxx 10xxxxxx
-                                add(static_cast<int>(0xC0u | (static_cast<unsigned int>(codepoint) >> 6u)));
-                                add(static_cast<int>(0x80u | (static_cast<unsigned int>(codepoint) & 0x3Fu)));
+                                add(static_cast<char_int_type>(0xC0u | (static_cast<unsigned int>(codepoint) >> 6u)));
+                                add(static_cast<char_int_type>(0x80u | (static_cast<unsigned int>(codepoint) & 0x3Fu)));
                            }
                            else if (codepoint <= 0xFFFF)
                            {
                                // 3-byte characters: 1110xxxx 10xxxxxx 10xxxxxx
-                                add(static_cast<int>(0xE0u | (static_cast<unsigned int>(codepoint) >> 12u)));
-                                add(static_cast<int>(0x80u | ((static_cast<unsigned int>(codepoint) >> 6u) & 0x3Fu)));
-                                add(static_cast<int>(0x80u | (static_cast<unsigned int>(codepoint) & 0x3Fu)));
+                                add(static_cast<char_int_type>(0xE0u | (static_cast<unsigned int>(codepoint) >> 12u)));
+                                add(static_cast<char_int_type>(0x80u | ((static_cast<unsigned int>(codepoint) >> 6u) & 0x3Fu)));
+                                add(static_cast<char_int_type>(0x80u | (static_cast<unsigned int>(codepoint) & 0x3Fu)));
                            }
                            else
                            {
                                // 4-byte characters: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
-                                add(static_cast<int>(0xF0u | (static_cast<unsigned int>(codepoint) >> 18u)));
-                                add(static_cast<int>(0x80u | ((static_cast<unsigned int>(codepoint) >> 12u) & 0x3Fu)));
-                                add(static_cast<int>(0x80u | ((static_cast<unsigned int>(codepoint) >> 6u) & 0x3Fu)));
-                                add(static_cast<int>(0x80u | (static_cast<unsigned int>(codepoint) & 0x3Fu)));
+                                add(static_cast<char_int_type>(0xF0u | (static_cast<unsigned int>(codepoint) >> 18u)));
+                                add(static_cast<char_int_type>(0x80u | ((static_cast<unsigned int>(codepoint) >> 12u) & 0x3Fu)));
+                                add(static_cast<char_int_type>(0x80u | ((static_cast<unsigned int>(codepoint) >> 6u) & 0x3Fu)));
+                                add(static_cast<char_int_type>(0x80u | (static_cast<unsigned int>(codepoint) & 0x3Fu)));
                            }

                            break;
@ -824,6 +829,77 @@ class lexer : public lexer_base<BasicJsonType>
        }
    }

+    /*!
+     * @brief scan a comment
+     * @return whether comment could be scanned successfully
+     */
+    bool scan_comment()
+    {
+        switch (get())
+        {
+            // single-line comments skip input until a newline or EOF is read
+            case '/':
+            {
+                while (true)
+                {
+                    switch (get())
+                    {
+                        case '\n':
+                        case '\r':
+                        case std::char_traits<char_type>::eof():
+                        case '\0':
+                            return true;
+
+                        default:
+                            break;
+                    }
+                }
+            }
+
+            // multi-line comments skip input until */ is read
+            case '*':
+            {
+                while (true)
+                {
+                    switch (get())
+                    {
+                        case std::char_traits<char_type>::eof():
+                        case '\0':
+                        {
+                            error_message = "invalid comment; missing closing '*/'";
+                            return false;
+                        }
+
+                        case '*':
+                        {
+                            switch (get())
+                            {
+                                case '/':
+                                    return true;
+
+                                default:
+                                {
+                                    unget();
+                                    break;
+                                }
+                            }
+                        }
+
+                        default:
+                            break;
+                    }
+                }
+            }
+
+            // unexpected character after reading '/'
+            default:
+            {
+                error_message = "invalid comment; expecting '/' or '*' after '/'";
+                return false;
+            }
+        }
+    }
+
    JSON_HEDLEY_NON_NULL(2)
    static void strtof(float& f, const char* str, char** endptr) noexcept
    {
@ -1213,13 +1289,13 @@ scan_number_done:
    @param[in] return_type   the token type to return on success
    */
    JSON_HEDLEY_NON_NULL(2)
-    token_type scan_literal(const char* literal_text, const std::size_t length,
+    token_type scan_literal(const char_type* literal_text, const std::size_t length,
                            token_type return_type)
    {
-        assert(current == literal_text[0]);
+        assert(std::char_traits<char_type>::to_char_type(current) == literal_text[0]);
        for (std::size_t i = 1; i < length; ++i)
        {
-            if (JSON_HEDLEY_UNLIKELY(get() != literal_text[i]))
+            if (JSON_HEDLEY_UNLIKELY(std::char_traits<char_type>::to_char_type(get()) != literal_text[i]))
            {
                error_message = "invalid literal";
                return token_type::parse_error;
@ -1237,7 +1313,7 @@ scan_number_done:
    {
        token_buffer.clear();
        token_string.clear();
-        token_string.push_back(std::char_traits<char>::to_char_type(current));
+        token_string.push_back(std::char_traits<char_type>::to_char_type(current));
    }

    /*
@ -1250,7 +1326,7 @@ scan_number_done:

    @return character read from the input
    */
-    std::char_traits<char>::int_type get()
+    char_int_type get()
    {
        ++position.chars_read_total;
        ++position.chars_read_current_line;
@ -1265,9 +1341,9 @@ scan_number_done:
            current = ia.get_character();
        }

-        if (JSON_HEDLEY_LIKELY(current != std::char_traits<char>::eof()))
+        if (JSON_HEDLEY_LIKELY(current != std::char_traits<char_type>::eof()))
        {
-            token_string.push_back(std::char_traits<char>::to_char_type(current));
+            token_string.push_back(std::char_traits<char_type>::to_char_type(current));
        }

        if (current == '\n')
@ -1306,7 +1382,7 @@ scan_number_done:
            --position.chars_read_current_line;
        }

-        if (JSON_HEDLEY_LIKELY(current != std::char_traits<char>::eof()))
+        if (JSON_HEDLEY_LIKELY(current != std::char_traits<char_type>::eof()))
        {
            assert(!token_string.empty());
            token_string.pop_back();
@ -1314,9 +1390,9 @@ scan_number_done:
    }

    /// add a character to token_buffer
-    void add(int c)
+    void add(char_int_type c)
    {
-        token_buffer.push_back(std::char_traits<char>::to_char_type(c));
+        token_buffer.push_back(static_cast<typename string_t::value_type>(c));
    }

  public:
@ -1377,7 +1453,7 @@ scan_number_done:
            else
            {
                // add character as is
-                result.push_back(c);
+                result.push_back(static_cast<std::string::value_type>(c));
            }
        }

@ -1413,6 +1489,15 @@ scan_number_done:
        return true;
    }

+    void skip_whitespace()
+    {
+        do
+        {
+            get();
+        }
+        while (current == ' ' or current == '\t' or current == '\n' or current == '\r');
+    }
+
    token_type scan()
    {
        // initially, skip the BOM
@ -1423,9 +1508,18 @@ scan_number_done:
        }

        // read next character and ignore whitespace
-        do
+        skip_whitespace();
+
+        // ignore comments
+        if (ignore_comments and current == '/')
        {
-            get();
+            if (not scan_comment())
+            {
+                return token_type::parse_error;
+            }
+
+            // skip following whitespace
+            skip_whitespace();
        }
        while (current == ' ' || current == '\t' || current == '\n' || current == '\r');

@ -1447,11 +1541,20 @@ scan_number_done:

            // literals
            case 't':
-                return scan_literal("true", 4, token_type::literal_true);
+            {
+                std::array<char_type, 4> true_literal = {{'t', 'r', 'u', 'e'}};
+                return scan_literal(true_literal.data(), true_literal.size(), token_type::literal_true);
+            }
            case 'f':
-                return scan_literal("false", 5, token_type::literal_false);
+            {
+                std::array<char_type, 5> false_literal = {{'f', 'a', 'l', 's', 'e'}};
+                return scan_literal(false_literal.data(), false_literal.size(), token_type::literal_false);
+            }
            case 'n':
-                return scan_literal("null", 4, token_type::literal_null);
+            {
+                std::array<char_type, 4> null_literal = {{'n', 'u', 'l', 'l'}};
+                return scan_literal(null_literal.data(), null_literal.size(), token_type::literal_null);
+            }

            // string
            case '\"':
@ -1474,7 +1577,7 @@ scan_number_done:
            // end of input (the null byte is needed when parsing from
            // string literals)
            case '\0':
-            case std::char_traits<char>::eof():
+            case std::char_traits<char_type>::eof():
                return token_type::end_of_input;

            // error
@ -1488,8 +1591,11 @@ scan_number_done:
    /// input adapter
    InputAdapterType ia;

+    /// whether comments should be ignored (true) or signaled as errors (false)
+    const bool ignore_comments = false;
+
    /// the current character
-    std::char_traits<char>::int_type current = std::char_traits<char>::eof();
+    char_int_type current = std::char_traits<char_type>::eof();

    /// whether the next get() call should just return current
    bool next_unget = false;
@ -1498,7 +1604,7 @@ scan_number_done:
    position_t position {};

    /// raw input token string (for error messages)
-    std::vector<char> token_string {};
+    std::vector<char_type> token_string {};

    /// buffer for variable-length tokens (numbers, strings)
    string_t token_buffer {};
@ -1512,7 +1618,7 @@ scan_number_done:
    number_float_t value_float = 0;

    /// the decimal point
-    const char decimal_point_char = '.';
+    const char_int_type decimal_point_char = '.';
 };
 }  // namespace detail
 }  // namespace nlohmann