From 546e148b24c8b58a6e3bed8aadb8f081f7059f98 Mon Sep 17 00:00:00 2001 From: Perry Kundert Date: Wed, 4 Oct 2017 11:31:10 -0700 Subject: [PATCH] Further performance improvements, and corrections in get_token_string o An (-'ve valued, typically -1) EOF must never be allowed in token_string, as it be converted to 255 -- a legitimate value. o Comparing against a specific eof() (-1, typically) is more costly than detecting +'ve/-'ve. Since EOF is the only non-positive value allowed we can use the simpler test. o Removed unnecessary test for token_string size, as it is already tested in the method, and must never occur in correct code; used an assert instead. --- src/json.hpp | 45 ++++++++++++++++++++-------------- test/src/unit-class_parser.cpp | 2 +- 2 files changed, 28 insertions(+), 19 deletions(-) diff --git a/src/json.hpp b/src/json.hpp index 2d95f18d..ed5f1c1d 100644 --- a/src/json.hpp +++ b/src/json.hpp @@ -1397,8 +1397,8 @@ constexpr T static_const::value; /// abstract input adapter interface struct input_adapter_protocol { - virtual int get_character() = 0; - virtual void unget_character() = 0; + virtual int get_character() = 0; // returns characters in range [0,255], or eof() (a -'ve value) + virtual void unget_character() = 0; // restore the last non-eof() character to input virtual ~input_adapter_protocol() = default; }; @@ -1449,7 +1449,7 @@ class input_stream_adapter : public input_adapter_protocol int get_character() override { int c = is.rdbuf()->sbumpc(); // Avoided for performance: int c = is.get(); - return c == std::char_traits::eof() ? c : ( c & 0xFF ); + return c < 0 ? c : ( c & 0xFF ); // faster than == std::char_traits::eof() } void unget_character() override @@ -2652,12 +2652,24 @@ scan_number_done: token_string.push_back(static_cast(current)); } - /// get a character from the input + /* + @brief get next character from the input + + This function provides the interface to the used input adapter. It does + not throw in case the input reached EOF, but returns a -'ve valued + `std::char_traits::eof()` in that case. Stores the scanned characters + for use in error messages. + + @return character read from the input + */ int get() { ++chars_read; current = ia->get_character(); - token_string.push_back(static_cast(current)); + if (JSON_LIKELY(current >= 0)) // faster than: != std::char_traits::eof())) + { + token_string.push_back(static_cast(current)); + } return current; } @@ -2665,12 +2677,12 @@ scan_number_done: void unget() { --chars_read; - if (JSON_LIKELY(current != std::char_traits::eof())) + if (JSON_LIKELY(current >= 0)) // faster than: != std::char_traits::eof())) { ia->unget_character(); - } - if (! token_string.empty()) + assert(token_string.size() != 0); token_string.pop_back(); + } } /// add a character to yytext @@ -2718,19 +2730,16 @@ scan_number_done: return chars_read; } - /// return the last read token (for errors only) + /// return the last read token (for errors only). Will never contain EOF + /// (a -'ve value), because 255 may legitimately occur. May contain NUL, which + /// should be escaped. std::string get_token_string() const { // escape control characters std::string result; for (auto c : token_string) { - if (c == '\0' or c == std::char_traits::eof()) - { - // ignore EOF - continue; - } - else if ('\x00' <= c and c <= '\x1f') + if ('\x00' <= c and c <= '\x1f') { // escape control characters std::stringstream ss; @@ -5144,7 +5153,7 @@ class binary_reader @brief get next character from the input This function provides the interface to the used input adapter. It does - not throw in case the input reached EOF, but returns + not throw in case the input reached EOF, but returns a -'ve valued `std::char_traits::eof()` in that case. @return character read from the input @@ -5448,14 +5457,14 @@ class binary_reader { if (expect_eof) { - if (JSON_UNLIKELY(current != std::char_traits::eof())) + if (JSON_UNLIKELY(current >= 0 )) // faster than: != std::char_traits::eof())) { JSON_THROW(parse_error::create(110, chars_read, "expected end of input")); } } else { - if (JSON_UNLIKELY(current == std::char_traits::eof())) + if (JSON_UNLIKELY(current < 0)) // faster than: == std::char_traits::eof())) { JSON_THROW(parse_error::create(110, chars_read, "unexpected end of input")); } diff --git a/test/src/unit-class_parser.cpp b/test/src/unit-class_parser.cpp index 08d4d6ef..9afa7d26 100644 --- a/test/src/unit-class_parser.cpp +++ b/test/src/unit-class_parser.cpp @@ -215,7 +215,7 @@ TEST_CASE("parser class") std::string s = "\"1\""; s[1] = '\0'; CHECK_THROWS_AS(json::parse(s.begin(), s.end()), json::parse_error&); - CHECK_THROWS_WITH(json::parse(s.begin(), s.end()), "[json.exception.parse_error.101] parse error at 2: syntax error - invalid string: control character must be escaped; last read: '\"'"); + CHECK_THROWS_WITH(json::parse(s.begin(), s.end()), "[json.exception.parse_error.101] parse error at 2: syntax error - invalid string: control character must be escaped; last read: '\"'"); } }