Improve performance by constructing yytext as a std::string

o Return its contents when necessary.  In many cases, this avoids
  construction of multiple copies of the yytext token.  Exceeds
  performance of current develop branch.
This commit is contained in:
Perry Kundert 2017-10-04 08:40:32 -07:00
parent 7c523338c5
commit 97a388802d

View file

@ -1795,9 +1795,9 @@ class lexer
@brief scan a string literal @brief scan a string literal
This function scans a string according to Sect. 7 of RFC 7159. While This function scans a string according to Sect. 7 of RFC 7159. While
scanning, bytes are escaped and copied into buffer yytext. Then the scanning, bytes are escaped and copied into buffer yytext. Then the function
function returns successfully, yytext is null-terminated and yylen returns successfully, yytext is *not* null-terminated (as it may contain \0
contains the number of bytes in the string. bytes), and yytext.size() is the number of bytes in the string.
@return token_type::value_string if string could be successfully scanned, @return token_type::value_string if string could be successfully scanned,
token_type::parse_error otherwise token_type::parse_error otherwise
@ -2582,7 +2582,7 @@ scan_number_done:
const auto x = std::strtoull(yytext.data(), &endptr, 10); const auto x = std::strtoull(yytext.data(), &endptr, 10);
// we checked the number format before // we checked the number format before
assert(endptr == yytext.data() + yylen); assert(endptr == yytext.data() + yytext.size());
if (errno == 0) if (errno == 0)
{ {
@ -2598,7 +2598,7 @@ scan_number_done:
const auto x = std::strtoll(yytext.data(), &endptr, 10); const auto x = std::strtoll(yytext.data(), &endptr, 10);
// we checked the number format before // we checked the number format before
assert(endptr == yytext.data() + yylen); assert(endptr == yytext.data() + yytext.size());
if (errno == 0) if (errno == 0)
{ {
@ -2615,7 +2615,7 @@ scan_number_done:
strtof(value_float, yytext.data(), &endptr); strtof(value_float, yytext.data(), &endptr);
// we checked the number format before // we checked the number format before
assert(endptr == yytext.data() + yylen); assert(endptr == yytext.data() + yytext.size());
return token_type::value_float; return token_type::value_float;
} }
@ -2647,7 +2647,7 @@ scan_number_done:
/// reset yytext; current character is beginning of token /// reset yytext; current character is beginning of token
void reset() noexcept void reset() noexcept
{ {
yylen = 0; yytext.clear();
token_string.clear(); token_string.clear();
token_string.push_back(static_cast<char>(current)); token_string.push_back(static_cast<char>(current));
} }
@ -2676,14 +2676,7 @@ scan_number_done:
/// add a character to yytext /// add a character to yytext
void add(int c) void add(int c)
{ {
// resize yytext if necessary; this condition is deemed unlikely, yytext.push_back(static_cast<char>(c));
// because we start with a 1024-byte buffer
if (JSON_UNLIKELY((yylen + 1 > yytext.capacity())))
{
yytext.resize(2 * yytext.capacity(), '\0');
}
assert(yylen < yytext.size());
yytext[yylen++] = static_cast<char>(c);
} }
public: public:
@ -2712,9 +2705,7 @@ scan_number_done:
/// return string value /// return string value
const std::string get_string() const std::string get_string()
{ {
// yytext cannot be returned as char*, because it may contain a null return std::move( yytext );
// byte (parsed as "\u0000")
return std::string(yytext.data(), yylen);
} }
///////////////////// /////////////////////
@ -2844,9 +2835,7 @@ scan_number_done:
std::vector<char> token_string = std::vector<char>(); std::vector<char> token_string = std::vector<char>();
/// buffer for variable-length tokens (numbers, strings) /// buffer for variable-length tokens (numbers, strings)
std::vector<char> yytext = std::vector<char>(1024, '\0'); std::string yytext = "";
/// current index in yytext
std::size_t yylen = 0;
/// a description of occurred lexer errors /// a description of occurred lexer errors
const char* error_message = ""; const char* error_message = "";