Restore istream performance #764

o Use std::streambuf I/O instead of std::istream; does not maintain
  (unused) istream flags.
o Further simplify get/unget handling.
o Restore original handling of NUL in input stream; ignored during
  token_string escaping.
This commit is contained in:
Perry Kundert 2017-10-03 14:38:38 -07:00
parent 12efeadc2e
commit 14ca1f6f09

View file

@ -1410,48 +1410,51 @@ using input_adapter_t = std::shared_ptr<input_adapter_protocol>;
class input_stream_adapter : public input_adapter_protocol class input_stream_adapter : public input_adapter_protocol
{ {
public: public:
~input_stream_adapter() override
{
// clear stream flags; we use underlying streambuf I/O, do not maintain ifstream flags
is.clear();
}
explicit input_stream_adapter(std::istream& i) explicit input_stream_adapter(std::istream& i)
: is(i) : is(i)
{ {
// Ignore Byte Order Mark at start of input // Ignore Byte Order Mark at start of input
int c; int c;
if (( c = get_character() ) == 0xEF ) if (( c = get_character() ) == 0xEF )
{ {
if (( c = get_character() ) == 0xBB ) if (( c = get_character() ) == 0xBB )
{ {
if (( c = get_character() ) == 0xBF ) if (( c = get_character() ) == 0xBF )
{ {
return; // Ignore BOM return; // Ignore BOM
} }
else if ( c != std::char_traits<char>::eof() ) else if ( c != std::char_traits<char>::eof() )
{ {
is.unget(); is.unget();
} }
is.putback( '\xBB' ); is.putback( '\xBB' );
} }
else if ( c != std::char_traits<char>::eof() ) else if ( c != std::char_traits<char>::eof() )
{ {
is.unget(); is.unget();
} }
is.putback( '\xEF' ); is.putback( '\xEF' );
} }
else if ( c != std::char_traits<char>::eof() ) else if ( c != std::char_traits<char>::eof() )
{ {
is.unget(); // Not BOM. Process as usual. is.unget(); // Not BOM. Process as usual.
} }
} }
~input_stream_adapter() override {}
int get_character() override int get_character() override
{ {
int c = is.get(); int c = is.rdbuf()->sbumpc(); // Avoided for performance: int c = is.get();
return c == std::char_traits<char>::eof() ? c : ( c & 0xFF ); return c == std::char_traits<char>::eof() ? c : ( c & 0xFF );
} }
void unget_character() override void unget_character() override
{ {
is.unget(); is.rdbuf()->sungetc(); // Avoided for performance: is.unget();
} }
private: private:
@ -1489,10 +1492,10 @@ class input_buffer_adapter : public input_adapter_protocol
void unget_character() noexcept override void unget_character() noexcept override
{ {
if (JSON_LIKELY(cursor > 0)) if (JSON_LIKELY(cursor > start))
{ {
--cursor; --cursor;
} }
} }
private: private:
@ -2571,7 +2574,7 @@ scan_number_any2:
scan_number_done: scan_number_done:
// unget the character after the number (we only read it to know that // unget the character after the number (we only read it to know that
// we are done scanning a number) // we are done scanning a number)
unget(); unget();
// terminate token // terminate token
add('\0'); add('\0');
@ -2652,29 +2655,31 @@ scan_number_done:
void reset() noexcept void reset() noexcept
{ {
yylen = 0; yylen = 0;
start_pos = chars_read - 1; token_string.clear();
token_string = static_cast<char>( current ); token_string.push_back(static_cast<char>(current));
} }
/// get a character from the input /// get a character from the input
int get() int get()
{ {
++chars_read; ++chars_read;
int c = current = ia->get_character();
int c = current = ia->get_character(); token_string.push_back(static_cast<char>(c));
token_string += static_cast<char>( c ); return c;
return c;
} }
/// unget current character (return it again on next get) /// unget current character (return it again on next get)
void unget() void unget()
{ {
--chars_read; --chars_read;
if (JSON_LIKELY(current != std::char_traits<char>::eof()))
if (token_string.size() > 0) {
token_string.resize( token_string.size() - 1 ); ia->unget_character();
}
if (! token_string.empty())
token_string.pop_back();
} }
/// add a character to yytext /// add a character to yytext
void add(int c) void add(int c)
{ {
@ -2736,10 +2741,12 @@ scan_number_done:
std::string result; std::string result;
for (auto c : token_string) for (auto c : token_string)
{ {
if ( c == std::char_traits<char>::eof() ) { if (c == '\0' or c == std::char_traits<char>::eof())
continue; {
} // ignore EOF
else if ('\x00' <= c and c <= '\x1f') continue;
}
else if ('\x00' <= c and c <= '\x1f')
{ {
// escape control characters // escape control characters
std::stringstream ss; std::stringstream ss;
@ -2840,10 +2847,8 @@ scan_number_done:
/// the number of characters read /// the number of characters read
std::size_t chars_read = 0; std::size_t chars_read = 0;
/// the start position of the current token
std::size_t start_pos = 0;
/// raw input token string (for error messages) /// raw input token string (for error messages)
std::string token_string = ""; std::vector<char> token_string = std::vector<char>();
/// buffer for variable-length tokens (numbers, strings) /// buffer for variable-length tokens (numbers, strings)
std::vector<char> yytext = std::vector<char>(1024, '\0'); std::vector<char> yytext = std::vector<char>(1024, '\0');