Further performance improvements, and corrections in get_token_string
o An (-'ve valued, typically -1) EOF must never be allowed in token_string, as it be converted to 255 -- a legitimate value. o Comparing against a specific eof() (-1, typically) is more costly than detecting +'ve/-'ve. Since EOF is the only non-positive value allowed we can use the simpler test. o Removed unnecessary test for token_string size, as it is already tested in the method, and must never occur in correct code; used an assert instead.
This commit is contained in:
parent
8665e25942
commit
546e148b24
2 changed files with 28 additions and 19 deletions
45
src/json.hpp
45
src/json.hpp
|
@ -1397,8 +1397,8 @@ constexpr T static_const<T>::value;
|
||||||
/// abstract input adapter interface
|
/// abstract input adapter interface
|
||||||
struct input_adapter_protocol
|
struct input_adapter_protocol
|
||||||
{
|
{
|
||||||
virtual int get_character() = 0;
|
virtual int get_character() = 0; // returns characters in range [0,255], or eof() (a -'ve value)
|
||||||
virtual void unget_character() = 0;
|
virtual void unget_character() = 0; // restore the last non-eof() character to input
|
||||||
virtual ~input_adapter_protocol() = default;
|
virtual ~input_adapter_protocol() = default;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -1449,7 +1449,7 @@ class input_stream_adapter : public input_adapter_protocol
|
||||||
int get_character() override
|
int get_character() override
|
||||||
{
|
{
|
||||||
int c = is.rdbuf()->sbumpc(); // Avoided for performance: int c = is.get();
|
int c = is.rdbuf()->sbumpc(); // Avoided for performance: int c = is.get();
|
||||||
return c == std::char_traits<char>::eof() ? c : ( c & 0xFF );
|
return c < 0 ? c : ( c & 0xFF ); // faster than == std::char_traits<char>::eof()
|
||||||
}
|
}
|
||||||
|
|
||||||
void unget_character() override
|
void unget_character() override
|
||||||
|
@ -2652,12 +2652,24 @@ scan_number_done:
|
||||||
token_string.push_back(static_cast<char>(current));
|
token_string.push_back(static_cast<char>(current));
|
||||||
}
|
}
|
||||||
|
|
||||||
/// get a character from the input
|
/*
|
||||||
|
@brief get next character from the input
|
||||||
|
|
||||||
|
This function provides the interface to the used input adapter. It does
|
||||||
|
not throw in case the input reached EOF, but returns a -'ve valued
|
||||||
|
`std::char_traits<char>::eof()` in that case. Stores the scanned characters
|
||||||
|
for use in error messages.
|
||||||
|
|
||||||
|
@return character read from the input
|
||||||
|
*/
|
||||||
int get()
|
int get()
|
||||||
{
|
{
|
||||||
++chars_read;
|
++chars_read;
|
||||||
current = ia->get_character();
|
current = ia->get_character();
|
||||||
token_string.push_back(static_cast<char>(current));
|
if (JSON_LIKELY(current >= 0)) // faster than: != std::char_traits<char>::eof()))
|
||||||
|
{
|
||||||
|
token_string.push_back(static_cast<char>(current));
|
||||||
|
}
|
||||||
return current;
|
return current;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2665,12 +2677,12 @@ scan_number_done:
|
||||||
void unget()
|
void unget()
|
||||||
{
|
{
|
||||||
--chars_read;
|
--chars_read;
|
||||||
if (JSON_LIKELY(current != std::char_traits<char>::eof()))
|
if (JSON_LIKELY(current >= 0)) // faster than: != std::char_traits<char>::eof()))
|
||||||
{
|
{
|
||||||
ia->unget_character();
|
ia->unget_character();
|
||||||
}
|
assert(token_string.size() != 0);
|
||||||
if (! token_string.empty())
|
|
||||||
token_string.pop_back();
|
token_string.pop_back();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// add a character to yytext
|
/// add a character to yytext
|
||||||
|
@ -2718,19 +2730,16 @@ scan_number_done:
|
||||||
return chars_read;
|
return chars_read;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// return the last read token (for errors only)
|
/// return the last read token (for errors only). Will never contain EOF
|
||||||
|
/// (a -'ve value), because 255 may legitimately occur. May contain NUL, which
|
||||||
|
/// should be escaped.
|
||||||
std::string get_token_string() const
|
std::string get_token_string() const
|
||||||
{
|
{
|
||||||
// escape control characters
|
// escape control characters
|
||||||
std::string result;
|
std::string result;
|
||||||
for (auto c : token_string)
|
for (auto c : token_string)
|
||||||
{
|
{
|
||||||
if (c == '\0' or c == std::char_traits<char>::eof())
|
if ('\x00' <= c and c <= '\x1f')
|
||||||
{
|
|
||||||
// ignore EOF
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
else if ('\x00' <= c and c <= '\x1f')
|
|
||||||
{
|
{
|
||||||
// escape control characters
|
// escape control characters
|
||||||
std::stringstream ss;
|
std::stringstream ss;
|
||||||
|
@ -5144,7 +5153,7 @@ class binary_reader
|
||||||
@brief get next character from the input
|
@brief get next character from the input
|
||||||
|
|
||||||
This function provides the interface to the used input adapter. It does
|
This function provides the interface to the used input adapter. It does
|
||||||
not throw in case the input reached EOF, but returns
|
not throw in case the input reached EOF, but returns a -'ve valued
|
||||||
`std::char_traits<char>::eof()` in that case.
|
`std::char_traits<char>::eof()` in that case.
|
||||||
|
|
||||||
@return character read from the input
|
@return character read from the input
|
||||||
|
@ -5448,14 +5457,14 @@ class binary_reader
|
||||||
{
|
{
|
||||||
if (expect_eof)
|
if (expect_eof)
|
||||||
{
|
{
|
||||||
if (JSON_UNLIKELY(current != std::char_traits<char>::eof()))
|
if (JSON_UNLIKELY(current >= 0 )) // faster than: != std::char_traits<char>::eof()))
|
||||||
{
|
{
|
||||||
JSON_THROW(parse_error::create(110, chars_read, "expected end of input"));
|
JSON_THROW(parse_error::create(110, chars_read, "expected end of input"));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (JSON_UNLIKELY(current == std::char_traits<char>::eof()))
|
if (JSON_UNLIKELY(current < 0)) // faster than: == std::char_traits<char>::eof()))
|
||||||
{
|
{
|
||||||
JSON_THROW(parse_error::create(110, chars_read, "unexpected end of input"));
|
JSON_THROW(parse_error::create(110, chars_read, "unexpected end of input"));
|
||||||
}
|
}
|
||||||
|
|
|
@ -215,7 +215,7 @@ TEST_CASE("parser class")
|
||||||
std::string s = "\"1\"";
|
std::string s = "\"1\"";
|
||||||
s[1] = '\0';
|
s[1] = '\0';
|
||||||
CHECK_THROWS_AS(json::parse(s.begin(), s.end()), json::parse_error&);
|
CHECK_THROWS_AS(json::parse(s.begin(), s.end()), json::parse_error&);
|
||||||
CHECK_THROWS_WITH(json::parse(s.begin(), s.end()), "[json.exception.parse_error.101] parse error at 2: syntax error - invalid string: control character must be escaped; last read: '\"'");
|
CHECK_THROWS_WITH(json::parse(s.begin(), s.end()), "[json.exception.parse_error.101] parse error at 2: syntax error - invalid string: control character must be escaped; last read: '\"<U+0000>'");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue