🔨 small parser performance improvements

- replaced list of pairs by flat list in next_byte_in_range - implemented early exit in case of parse errors - reused memory for object keys - direct calls to embedded objects/arrays for insertions
2017-07-30 13:25:36 +02:00 · 2017-07-30 13:25:36 +02:00 · 7737a29518
commit 7737a29518
parent c819a2d732
1 changed files with 41 additions and 27 deletions
--- a/src/json.hpp
+++ b/src/json.hpp
@ -1675,14 +1675,15 @@ class lexer

    @return true iff no range violation was detected
    */
-    bool next_byte_in_range(std::initializer_list<std::pair<int, int>> ranges)
+    bool next_byte_in_range(std::initializer_list<int> ranges)
    {
+        assert(ranges.size() == 2 or ranges.size() == 4 or ranges.size() == 6);
        add(current);

-        for (const auto& range : ranges)
+        for (auto range = ranges.begin(); range != ranges.end(); ++range)
        {
            get();
-            if (JSON_LIKELY(range.first <= current and current <= range.second))
+            if (JSON_LIKELY(*range <= current and current <= *(++range)))
            {
                add(current);
            }
@ -2053,7 +2054,7 @@ class lexer
                case 0xde:
                case 0xdf:
                {
-                    if (JSON_UNLIKELY(not next_byte_in_range({{0x80, 0xBF}})))
+                    if (JSON_UNLIKELY(not next_byte_in_range({0x80, 0xBF})))
                    {
                        return token_type::parse_error;
                    }
@ -2063,7 +2064,7 @@ class lexer
                // U+0800..U+0FFF: bytes E0 A0..BF 80..BF
                case 0xe0:
                {
-                    if (JSON_UNLIKELY(not (next_byte_in_range({{0xA0, 0xBF}, {0x80, 0xBF}}))))
+                    if (JSON_UNLIKELY(not (next_byte_in_range({0xA0, 0xBF, 0x80, 0xBF}))))
                    {
                        return token_type::parse_error;
                    }
@ -2087,7 +2088,7 @@ class lexer
                case 0xee:
                case 0xef:
                {
-                    if (JSON_UNLIKELY(not (next_byte_in_range({{0x80, 0xBF}, {0x80, 0xBF}}))))
+                    if (JSON_UNLIKELY(not (next_byte_in_range({0x80, 0xBF, 0x80, 0xBF}))))
                    {
                        return token_type::parse_error;
                    }
@ -2097,7 +2098,7 @@ class lexer
                // U+D000..U+D7FF: bytes ED 80..9F 80..BF
                case 0xed:
                {
-                    if (JSON_UNLIKELY(not (next_byte_in_range({{0x80, 0x9F}, {0x80, 0xBF}}))))
+                    if (JSON_UNLIKELY(not (next_byte_in_range({0x80, 0x9F, 0x80, 0xBF}))))
                    {
                        return token_type::parse_error;
                    }
@ -2107,7 +2108,7 @@ class lexer
                // U+10000..U+3FFFF F0 90..BF 80..BF 80..BF
                case 0xf0:
                {
-                    if (JSON_UNLIKELY(not (next_byte_in_range({{0x90, 0xBF}, {0x80, 0xBF}, {0x80, 0xBF}}))))
+                    if (JSON_UNLIKELY(not (next_byte_in_range({0x90, 0xBF, 0x80, 0xBF, 0x80, 0xBF}))))
                    {
                        return token_type::parse_error;
                    }
@ -2119,7 +2120,7 @@ class lexer
                case 0xf2:
                case 0xf3:
                {
-                    if (JSON_UNLIKELY(not (next_byte_in_range({{0x80, 0xBF}, {0x80, 0xBF}, {0x80, 0xBF}}))))
+                    if (JSON_UNLIKELY(not (next_byte_in_range({0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF}))))
                    {
                        return token_type::parse_error;
                    }
@ -2129,7 +2130,7 @@ class lexer
                // U+100000..U+10FFFF F4 80..8F 80..BF 80..BF
                case 0xf4:
                {
-                    if (JSON_UNLIKELY(not (next_byte_in_range({{0x80, 0x8F}, {0x80, 0xBF}, {0x80, 0xBF}}))))
+                    if (JSON_UNLIKELY(not (next_byte_in_range({0x80, 0x8F, 0x80, 0xBF, 0x80, 0xBF}))))
                    {
                        return token_type::parse_error;
                    }
@ -2166,12 +2167,11 @@ class lexer

    This function scans a string according to Sect. 6 of RFC 7159.

-    The function is realized with a deterministic finite state machine
-    derived from the grammar described in RFC 7159. Starting in state
-    "init", the input is read and used to determined the next state. Only
-    state "done" accepts the number. State "error" is a trap state to model
-    errors. In the table below, "anything" means any character but the ones
-    listed before.
+    The function is realized with a deterministic finite state machine derived
+    from the grammar described in RFC 7159. Starting in state "init", the
+    input is read and used to determined the next state. Only state "done"
+    accepts the number. State "error" is a trap state to model errors. In the
+    table below, "anything" means any character but the ones listed before.

    state    | 0        | 1-9      | e E      | +       | -       | .        | anything
    ---------|----------|----------|----------|---------|---------|----------|-----------
@ -2486,8 +2486,8 @@ scan_number_any2:
        }

 scan_number_done:
-        // unget the character after the number (we only read it to know that we
-        // are done scanning a number)
+        // unget the character after the number (we only read it to know that
+        // we are done scanning a number)
        --chars_read;
        next_unget = true;

@ -2886,6 +2886,9 @@ class parser
    */
    void parse_internal(bool keep, BasicJsonType& result)
    {
+        // never parse after a parse error was detected
+        assert(not errored);
+
        // start with a discarded value
        if (not result.is_discarded())
        {
@ -2919,6 +2922,7 @@ class parser
                }

                // parse values
+                std::string key;
                BasicJsonType value;
                while (true)
                {
@ -2927,7 +2931,7 @@ class parser
                    {
                        return;
                    }
-                    const auto key = m_lexer.get_string();
+                    key = m_lexer.get_string();

                    bool keep_tag = false;
                    if (keep)
@ -2955,9 +2959,15 @@ class parser
                    value.m_value.destroy(value.m_type);
                    value.m_type = value_t::discarded;
                    parse_internal(keep, value);
+
+                    if (JSON_UNLIKELY(errored))
+                    {
+                        return;
+                    }
+
                    if (keep and keep_tag and not value.is_discarded())
                    {
-                        result[key] = std::move(value);
+                        result.m_value.object->operator[](std::move(key)) = std::move(value);
                    }

                    // comma -> next value
@ -3015,9 +3025,15 @@ class parser
                    value.m_value.destroy(value.m_type);
                    value.m_type = value_t::discarded;
                    parse_internal(keep, value);
+
+                    if (JSON_UNLIKELY(errored))
+                    {
+                        return;
+                    }
+
                    if (keep and not value.is_discarded())
                    {
-                        result.push_back(std::move(value));
+                        result.m_value.array->push_back(std::move(value));
                    }

                    // comma -> next value
@ -4405,8 +4421,7 @@ class binary_reader

    @param[in] adapter  input adapter to read from
    */
-    explicit binary_reader(input_adapter_t adapter)
-        : ia(adapter), is_little_endian(little_endianess())
+    explicit binary_reader(input_adapter_t adapter) : ia(adapter)
    {
        assert(ia);
    }
@ -5526,7 +5541,7 @@ class binary_reader
    std::size_t chars_read = 0;

    /// whether we can assume little endianess
-    const bool is_little_endian = true;
+    const bool is_little_endian = little_endianess();
 };

 /*!
@ -5541,8 +5556,7 @@ class binary_writer

    @param[in] adapter  output adapter to write to
    */
-    explicit binary_writer(output_adapter_t<CharType> adapter)
-        : is_little_endian(binary_reader<BasicJsonType>::little_endianess()), oa(adapter)
+    explicit binary_writer(output_adapter_t<CharType> adapter) : oa(adapter)
    {
        assert(oa);
    }
@ -6067,7 +6081,7 @@ class binary_writer

  private:
    /// whether we can assume little endianess
-    const bool is_little_endian = true;
+    const bool is_little_endian = binary_reader<BasicJsonType>::little_endianess();

    /// the output
    output_adapter_t<CharType> oa = nullptr;