🔨 small parser performance improvements
- replaced list of pairs by flat list in next_byte_in_range - implemented early exit in case of parse errors - reused memory for object keys - direct calls to embedded objects/arrays for insertions
This commit is contained in:
parent
c819a2d732
commit
7737a29518
1 changed files with 41 additions and 27 deletions
68
src/json.hpp
68
src/json.hpp
|
@ -1675,14 +1675,15 @@ class lexer
|
||||||
|
|
||||||
@return true iff no range violation was detected
|
@return true iff no range violation was detected
|
||||||
*/
|
*/
|
||||||
bool next_byte_in_range(std::initializer_list<std::pair<int, int>> ranges)
|
bool next_byte_in_range(std::initializer_list<int> ranges)
|
||||||
{
|
{
|
||||||
|
assert(ranges.size() == 2 or ranges.size() == 4 or ranges.size() == 6);
|
||||||
add(current);
|
add(current);
|
||||||
|
|
||||||
for (const auto& range : ranges)
|
for (auto range = ranges.begin(); range != ranges.end(); ++range)
|
||||||
{
|
{
|
||||||
get();
|
get();
|
||||||
if (JSON_LIKELY(range.first <= current and current <= range.second))
|
if (JSON_LIKELY(*range <= current and current <= *(++range)))
|
||||||
{
|
{
|
||||||
add(current);
|
add(current);
|
||||||
}
|
}
|
||||||
|
@ -2053,7 +2054,7 @@ class lexer
|
||||||
case 0xde:
|
case 0xde:
|
||||||
case 0xdf:
|
case 0xdf:
|
||||||
{
|
{
|
||||||
if (JSON_UNLIKELY(not next_byte_in_range({{0x80, 0xBF}})))
|
if (JSON_UNLIKELY(not next_byte_in_range({0x80, 0xBF})))
|
||||||
{
|
{
|
||||||
return token_type::parse_error;
|
return token_type::parse_error;
|
||||||
}
|
}
|
||||||
|
@ -2063,7 +2064,7 @@ class lexer
|
||||||
// U+0800..U+0FFF: bytes E0 A0..BF 80..BF
|
// U+0800..U+0FFF: bytes E0 A0..BF 80..BF
|
||||||
case 0xe0:
|
case 0xe0:
|
||||||
{
|
{
|
||||||
if (JSON_UNLIKELY(not (next_byte_in_range({{0xA0, 0xBF}, {0x80, 0xBF}}))))
|
if (JSON_UNLIKELY(not (next_byte_in_range({0xA0, 0xBF, 0x80, 0xBF}))))
|
||||||
{
|
{
|
||||||
return token_type::parse_error;
|
return token_type::parse_error;
|
||||||
}
|
}
|
||||||
|
@ -2087,7 +2088,7 @@ class lexer
|
||||||
case 0xee:
|
case 0xee:
|
||||||
case 0xef:
|
case 0xef:
|
||||||
{
|
{
|
||||||
if (JSON_UNLIKELY(not (next_byte_in_range({{0x80, 0xBF}, {0x80, 0xBF}}))))
|
if (JSON_UNLIKELY(not (next_byte_in_range({0x80, 0xBF, 0x80, 0xBF}))))
|
||||||
{
|
{
|
||||||
return token_type::parse_error;
|
return token_type::parse_error;
|
||||||
}
|
}
|
||||||
|
@ -2097,7 +2098,7 @@ class lexer
|
||||||
// U+D000..U+D7FF: bytes ED 80..9F 80..BF
|
// U+D000..U+D7FF: bytes ED 80..9F 80..BF
|
||||||
case 0xed:
|
case 0xed:
|
||||||
{
|
{
|
||||||
if (JSON_UNLIKELY(not (next_byte_in_range({{0x80, 0x9F}, {0x80, 0xBF}}))))
|
if (JSON_UNLIKELY(not (next_byte_in_range({0x80, 0x9F, 0x80, 0xBF}))))
|
||||||
{
|
{
|
||||||
return token_type::parse_error;
|
return token_type::parse_error;
|
||||||
}
|
}
|
||||||
|
@ -2107,7 +2108,7 @@ class lexer
|
||||||
// U+10000..U+3FFFF F0 90..BF 80..BF 80..BF
|
// U+10000..U+3FFFF F0 90..BF 80..BF 80..BF
|
||||||
case 0xf0:
|
case 0xf0:
|
||||||
{
|
{
|
||||||
if (JSON_UNLIKELY(not (next_byte_in_range({{0x90, 0xBF}, {0x80, 0xBF}, {0x80, 0xBF}}))))
|
if (JSON_UNLIKELY(not (next_byte_in_range({0x90, 0xBF, 0x80, 0xBF, 0x80, 0xBF}))))
|
||||||
{
|
{
|
||||||
return token_type::parse_error;
|
return token_type::parse_error;
|
||||||
}
|
}
|
||||||
|
@ -2119,7 +2120,7 @@ class lexer
|
||||||
case 0xf2:
|
case 0xf2:
|
||||||
case 0xf3:
|
case 0xf3:
|
||||||
{
|
{
|
||||||
if (JSON_UNLIKELY(not (next_byte_in_range({{0x80, 0xBF}, {0x80, 0xBF}, {0x80, 0xBF}}))))
|
if (JSON_UNLIKELY(not (next_byte_in_range({0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF}))))
|
||||||
{
|
{
|
||||||
return token_type::parse_error;
|
return token_type::parse_error;
|
||||||
}
|
}
|
||||||
|
@ -2129,7 +2130,7 @@ class lexer
|
||||||
// U+100000..U+10FFFF F4 80..8F 80..BF 80..BF
|
// U+100000..U+10FFFF F4 80..8F 80..BF 80..BF
|
||||||
case 0xf4:
|
case 0xf4:
|
||||||
{
|
{
|
||||||
if (JSON_UNLIKELY(not (next_byte_in_range({{0x80, 0x8F}, {0x80, 0xBF}, {0x80, 0xBF}}))))
|
if (JSON_UNLIKELY(not (next_byte_in_range({0x80, 0x8F, 0x80, 0xBF, 0x80, 0xBF}))))
|
||||||
{
|
{
|
||||||
return token_type::parse_error;
|
return token_type::parse_error;
|
||||||
}
|
}
|
||||||
|
@ -2166,12 +2167,11 @@ class lexer
|
||||||
|
|
||||||
This function scans a string according to Sect. 6 of RFC 7159.
|
This function scans a string according to Sect. 6 of RFC 7159.
|
||||||
|
|
||||||
The function is realized with a deterministic finite state machine
|
The function is realized with a deterministic finite state machine derived
|
||||||
derived from the grammar described in RFC 7159. Starting in state
|
from the grammar described in RFC 7159. Starting in state "init", the
|
||||||
"init", the input is read and used to determined the next state. Only
|
input is read and used to determined the next state. Only state "done"
|
||||||
state "done" accepts the number. State "error" is a trap state to model
|
accepts the number. State "error" is a trap state to model errors. In the
|
||||||
errors. In the table below, "anything" means any character but the ones
|
table below, "anything" means any character but the ones listed before.
|
||||||
listed before.
|
|
||||||
|
|
||||||
state | 0 | 1-9 | e E | + | - | . | anything
|
state | 0 | 1-9 | e E | + | - | . | anything
|
||||||
---------|----------|----------|----------|---------|---------|----------|-----------
|
---------|----------|----------|----------|---------|---------|----------|-----------
|
||||||
|
@ -2486,8 +2486,8 @@ scan_number_any2:
|
||||||
}
|
}
|
||||||
|
|
||||||
scan_number_done:
|
scan_number_done:
|
||||||
// unget the character after the number (we only read it to know that we
|
// unget the character after the number (we only read it to know that
|
||||||
// are done scanning a number)
|
// we are done scanning a number)
|
||||||
--chars_read;
|
--chars_read;
|
||||||
next_unget = true;
|
next_unget = true;
|
||||||
|
|
||||||
|
@ -2886,6 +2886,9 @@ class parser
|
||||||
*/
|
*/
|
||||||
void parse_internal(bool keep, BasicJsonType& result)
|
void parse_internal(bool keep, BasicJsonType& result)
|
||||||
{
|
{
|
||||||
|
// never parse after a parse error was detected
|
||||||
|
assert(not errored);
|
||||||
|
|
||||||
// start with a discarded value
|
// start with a discarded value
|
||||||
if (not result.is_discarded())
|
if (not result.is_discarded())
|
||||||
{
|
{
|
||||||
|
@ -2919,6 +2922,7 @@ class parser
|
||||||
}
|
}
|
||||||
|
|
||||||
// parse values
|
// parse values
|
||||||
|
std::string key;
|
||||||
BasicJsonType value;
|
BasicJsonType value;
|
||||||
while (true)
|
while (true)
|
||||||
{
|
{
|
||||||
|
@ -2927,7 +2931,7 @@ class parser
|
||||||
{
|
{
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
const auto key = m_lexer.get_string();
|
key = m_lexer.get_string();
|
||||||
|
|
||||||
bool keep_tag = false;
|
bool keep_tag = false;
|
||||||
if (keep)
|
if (keep)
|
||||||
|
@ -2955,9 +2959,15 @@ class parser
|
||||||
value.m_value.destroy(value.m_type);
|
value.m_value.destroy(value.m_type);
|
||||||
value.m_type = value_t::discarded;
|
value.m_type = value_t::discarded;
|
||||||
parse_internal(keep, value);
|
parse_internal(keep, value);
|
||||||
|
|
||||||
|
if (JSON_UNLIKELY(errored))
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (keep and keep_tag and not value.is_discarded())
|
if (keep and keep_tag and not value.is_discarded())
|
||||||
{
|
{
|
||||||
result[key] = std::move(value);
|
result.m_value.object->operator[](std::move(key)) = std::move(value);
|
||||||
}
|
}
|
||||||
|
|
||||||
// comma -> next value
|
// comma -> next value
|
||||||
|
@ -3015,9 +3025,15 @@ class parser
|
||||||
value.m_value.destroy(value.m_type);
|
value.m_value.destroy(value.m_type);
|
||||||
value.m_type = value_t::discarded;
|
value.m_type = value_t::discarded;
|
||||||
parse_internal(keep, value);
|
parse_internal(keep, value);
|
||||||
|
|
||||||
|
if (JSON_UNLIKELY(errored))
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (keep and not value.is_discarded())
|
if (keep and not value.is_discarded())
|
||||||
{
|
{
|
||||||
result.push_back(std::move(value));
|
result.m_value.array->push_back(std::move(value));
|
||||||
}
|
}
|
||||||
|
|
||||||
// comma -> next value
|
// comma -> next value
|
||||||
|
@ -4405,8 +4421,7 @@ class binary_reader
|
||||||
|
|
||||||
@param[in] adapter input adapter to read from
|
@param[in] adapter input adapter to read from
|
||||||
*/
|
*/
|
||||||
explicit binary_reader(input_adapter_t adapter)
|
explicit binary_reader(input_adapter_t adapter) : ia(adapter)
|
||||||
: ia(adapter), is_little_endian(little_endianess())
|
|
||||||
{
|
{
|
||||||
assert(ia);
|
assert(ia);
|
||||||
}
|
}
|
||||||
|
@ -5526,7 +5541,7 @@ class binary_reader
|
||||||
std::size_t chars_read = 0;
|
std::size_t chars_read = 0;
|
||||||
|
|
||||||
/// whether we can assume little endianess
|
/// whether we can assume little endianess
|
||||||
const bool is_little_endian = true;
|
const bool is_little_endian = little_endianess();
|
||||||
};
|
};
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
|
@ -5541,8 +5556,7 @@ class binary_writer
|
||||||
|
|
||||||
@param[in] adapter output adapter to write to
|
@param[in] adapter output adapter to write to
|
||||||
*/
|
*/
|
||||||
explicit binary_writer(output_adapter_t<CharType> adapter)
|
explicit binary_writer(output_adapter_t<CharType> adapter) : oa(adapter)
|
||||||
: is_little_endian(binary_reader<BasicJsonType>::little_endianess()), oa(adapter)
|
|
||||||
{
|
{
|
||||||
assert(oa);
|
assert(oa);
|
||||||
}
|
}
|
||||||
|
@ -6067,7 +6081,7 @@ class binary_writer
|
||||||
|
|
||||||
private:
|
private:
|
||||||
/// whether we can assume little endianess
|
/// whether we can assume little endianess
|
||||||
const bool is_little_endian = true;
|
const bool is_little_endian = binary_reader<BasicJsonType>::little_endianess();
|
||||||
|
|
||||||
/// the output
|
/// the output
|
||||||
output_adapter_t<CharType> oa = nullptr;
|
output_adapter_t<CharType> oa = nullptr;
|
||||||
|
|
Loading…
Reference in a new issue