🔨 small parser performance improvements

- replaced list of pairs by flat list in next_byte_in_range
- implemented early exit in case of parse errors
- reused memory for object keys
- direct calls to embedded objects/arrays for insertions
This commit is contained in:
Niels Lohmann 2017-07-30 13:25:36 +02:00
parent c819a2d732
commit 7737a29518
No known key found for this signature in database
GPG key ID: 7F3CEA63AE251B69

View file

@ -1675,14 +1675,15 @@ class lexer
@return true iff no range violation was detected @return true iff no range violation was detected
*/ */
bool next_byte_in_range(std::initializer_list<std::pair<int, int>> ranges) bool next_byte_in_range(std::initializer_list<int> ranges)
{ {
assert(ranges.size() == 2 or ranges.size() == 4 or ranges.size() == 6);
add(current); add(current);
for (const auto& range : ranges) for (auto range = ranges.begin(); range != ranges.end(); ++range)
{ {
get(); get();
if (JSON_LIKELY(range.first <= current and current <= range.second)) if (JSON_LIKELY(*range <= current and current <= *(++range)))
{ {
add(current); add(current);
} }
@ -2053,7 +2054,7 @@ class lexer
case 0xde: case 0xde:
case 0xdf: case 0xdf:
{ {
if (JSON_UNLIKELY(not next_byte_in_range({{0x80, 0xBF}}))) if (JSON_UNLIKELY(not next_byte_in_range({0x80, 0xBF})))
{ {
return token_type::parse_error; return token_type::parse_error;
} }
@ -2063,7 +2064,7 @@ class lexer
// U+0800..U+0FFF: bytes E0 A0..BF 80..BF // U+0800..U+0FFF: bytes E0 A0..BF 80..BF
case 0xe0: case 0xe0:
{ {
if (JSON_UNLIKELY(not (next_byte_in_range({{0xA0, 0xBF}, {0x80, 0xBF}})))) if (JSON_UNLIKELY(not (next_byte_in_range({0xA0, 0xBF, 0x80, 0xBF}))))
{ {
return token_type::parse_error; return token_type::parse_error;
} }
@ -2087,7 +2088,7 @@ class lexer
case 0xee: case 0xee:
case 0xef: case 0xef:
{ {
if (JSON_UNLIKELY(not (next_byte_in_range({{0x80, 0xBF}, {0x80, 0xBF}})))) if (JSON_UNLIKELY(not (next_byte_in_range({0x80, 0xBF, 0x80, 0xBF}))))
{ {
return token_type::parse_error; return token_type::parse_error;
} }
@ -2097,7 +2098,7 @@ class lexer
// U+D000..U+D7FF: bytes ED 80..9F 80..BF // U+D000..U+D7FF: bytes ED 80..9F 80..BF
case 0xed: case 0xed:
{ {
if (JSON_UNLIKELY(not (next_byte_in_range({{0x80, 0x9F}, {0x80, 0xBF}})))) if (JSON_UNLIKELY(not (next_byte_in_range({0x80, 0x9F, 0x80, 0xBF}))))
{ {
return token_type::parse_error; return token_type::parse_error;
} }
@ -2107,7 +2108,7 @@ class lexer
// U+10000..U+3FFFF F0 90..BF 80..BF 80..BF // U+10000..U+3FFFF F0 90..BF 80..BF 80..BF
case 0xf0: case 0xf0:
{ {
if (JSON_UNLIKELY(not (next_byte_in_range({{0x90, 0xBF}, {0x80, 0xBF}, {0x80, 0xBF}})))) if (JSON_UNLIKELY(not (next_byte_in_range({0x90, 0xBF, 0x80, 0xBF, 0x80, 0xBF}))))
{ {
return token_type::parse_error; return token_type::parse_error;
} }
@ -2119,7 +2120,7 @@ class lexer
case 0xf2: case 0xf2:
case 0xf3: case 0xf3:
{ {
if (JSON_UNLIKELY(not (next_byte_in_range({{0x80, 0xBF}, {0x80, 0xBF}, {0x80, 0xBF}})))) if (JSON_UNLIKELY(not (next_byte_in_range({0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF}))))
{ {
return token_type::parse_error; return token_type::parse_error;
} }
@ -2129,7 +2130,7 @@ class lexer
// U+100000..U+10FFFF F4 80..8F 80..BF 80..BF // U+100000..U+10FFFF F4 80..8F 80..BF 80..BF
case 0xf4: case 0xf4:
{ {
if (JSON_UNLIKELY(not (next_byte_in_range({{0x80, 0x8F}, {0x80, 0xBF}, {0x80, 0xBF}})))) if (JSON_UNLIKELY(not (next_byte_in_range({0x80, 0x8F, 0x80, 0xBF, 0x80, 0xBF}))))
{ {
return token_type::parse_error; return token_type::parse_error;
} }
@ -2166,12 +2167,11 @@ class lexer
This function scans a string according to Sect. 6 of RFC 7159. This function scans a string according to Sect. 6 of RFC 7159.
The function is realized with a deterministic finite state machine The function is realized with a deterministic finite state machine derived
derived from the grammar described in RFC 7159. Starting in state from the grammar described in RFC 7159. Starting in state "init", the
"init", the input is read and used to determined the next state. Only input is read and used to determined the next state. Only state "done"
state "done" accepts the number. State "error" is a trap state to model accepts the number. State "error" is a trap state to model errors. In the
errors. In the table below, "anything" means any character but the ones table below, "anything" means any character but the ones listed before.
listed before.
state | 0 | 1-9 | e E | + | - | . | anything state | 0 | 1-9 | e E | + | - | . | anything
---------|----------|----------|----------|---------|---------|----------|----------- ---------|----------|----------|----------|---------|---------|----------|-----------
@ -2486,8 +2486,8 @@ scan_number_any2:
} }
scan_number_done: scan_number_done:
// unget the character after the number (we only read it to know that we // unget the character after the number (we only read it to know that
// are done scanning a number) // we are done scanning a number)
--chars_read; --chars_read;
next_unget = true; next_unget = true;
@ -2886,6 +2886,9 @@ class parser
*/ */
void parse_internal(bool keep, BasicJsonType& result) void parse_internal(bool keep, BasicJsonType& result)
{ {
// never parse after a parse error was detected
assert(not errored);
// start with a discarded value // start with a discarded value
if (not result.is_discarded()) if (not result.is_discarded())
{ {
@ -2919,6 +2922,7 @@ class parser
} }
// parse values // parse values
std::string key;
BasicJsonType value; BasicJsonType value;
while (true) while (true)
{ {
@ -2927,7 +2931,7 @@ class parser
{ {
return; return;
} }
const auto key = m_lexer.get_string(); key = m_lexer.get_string();
bool keep_tag = false; bool keep_tag = false;
if (keep) if (keep)
@ -2955,9 +2959,15 @@ class parser
value.m_value.destroy(value.m_type); value.m_value.destroy(value.m_type);
value.m_type = value_t::discarded; value.m_type = value_t::discarded;
parse_internal(keep, value); parse_internal(keep, value);
if (JSON_UNLIKELY(errored))
{
return;
}
if (keep and keep_tag and not value.is_discarded()) if (keep and keep_tag and not value.is_discarded())
{ {
result[key] = std::move(value); result.m_value.object->operator[](std::move(key)) = std::move(value);
} }
// comma -> next value // comma -> next value
@ -3015,9 +3025,15 @@ class parser
value.m_value.destroy(value.m_type); value.m_value.destroy(value.m_type);
value.m_type = value_t::discarded; value.m_type = value_t::discarded;
parse_internal(keep, value); parse_internal(keep, value);
if (JSON_UNLIKELY(errored))
{
return;
}
if (keep and not value.is_discarded()) if (keep and not value.is_discarded())
{ {
result.push_back(std::move(value)); result.m_value.array->push_back(std::move(value));
} }
// comma -> next value // comma -> next value
@ -4405,8 +4421,7 @@ class binary_reader
@param[in] adapter input adapter to read from @param[in] adapter input adapter to read from
*/ */
explicit binary_reader(input_adapter_t adapter) explicit binary_reader(input_adapter_t adapter) : ia(adapter)
: ia(adapter), is_little_endian(little_endianess())
{ {
assert(ia); assert(ia);
} }
@ -5526,7 +5541,7 @@ class binary_reader
std::size_t chars_read = 0; std::size_t chars_read = 0;
/// whether we can assume little endianess /// whether we can assume little endianess
const bool is_little_endian = true; const bool is_little_endian = little_endianess();
}; };
/*! /*!
@ -5541,8 +5556,7 @@ class binary_writer
@param[in] adapter output adapter to write to @param[in] adapter output adapter to write to
*/ */
explicit binary_writer(output_adapter_t<CharType> adapter) explicit binary_writer(output_adapter_t<CharType> adapter) : oa(adapter)
: is_little_endian(binary_reader<BasicJsonType>::little_endianess()), oa(adapter)
{ {
assert(oa); assert(oa);
} }
@ -6067,7 +6081,7 @@ class binary_writer
private: private:
/// whether we can assume little endianess /// whether we can assume little endianess
const bool is_little_endian = true; const bool is_little_endian = binary_reader<BasicJsonType>::little_endianess();
/// the output /// the output
output_adapter_t<CharType> oa = nullptr; output_adapter_t<CharType> oa = nullptr;