This commit is contained in:
Niels 2015-02-11 09:41:23 +01:00
parent 8a4e127a57
commit 5d280143b7
3 changed files with 266 additions and 217 deletions

View file

@ -2415,15 +2415,27 @@ class basic_json
inline lexer() = default; inline lexer() = default;
#define YYMAXFILL 5 /*!
This function implements a scanner for JSON. It is specified using
regular expressions that try to follow RFC 7159 and ECMA-404 as close
as possible. These regular expressions are then translated into a
deterministic finite automaton (DFA) by the tool RE2C. As a result, the
translated code for this function consists of a large block of code
with goto jumps.
@return the class of the next token read from the buffer
@todo Unicode support needs to be checked.
*/
inline token_type scan() inline token_type scan()
{ {
#define YYFILL(n) m_start = m_cursor;
{ {
char yych; char yych;
static const unsigned char yybm[] = { static const unsigned char yybm[] =
{
128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
@ -2467,24 +2479,24 @@ class basic_json
{ {
if (yych <= 0x00) if (yych <= 0x00)
{ {
goto yy25; goto json_parser_25;
} }
if (yych >= '"') if (yych >= '"')
{ {
goto yy23; goto json_parser_23;
} }
} }
else else
{ {
if (yych <= '+') if (yych <= '+')
{ {
goto yy2; goto json_parser_2;
} }
if (yych <= ',') if (yych <= ',')
{ {
goto yy11; goto json_parser_11;
} }
goto yy18; goto json_parser_18;
} }
} }
else else
@ -2493,189 +2505,210 @@ class basic_json
{ {
if (yych <= '/') if (yych <= '/')
{ {
goto yy2; goto json_parser_2;
} }
if (yych <= '0') if (yych <= '0')
{ {
goto yy19; goto json_parser_19;
} }
goto yy21; goto json_parser_21;
} }
else else
{ {
if (yych <= ':') if (yych <= ':')
{ {
goto yy13; goto json_parser_13;
} }
if (yych >= '[') if (yych >= '[')
{ {
goto yy3; goto json_parser_3;
} }
} }
} }
} }
else { else
{
if (yych <= 'n') if (yych <= 'n')
{ {
if (yych <= 'e') if (yych <= 'e')
{ {
if (yych == ']') if (yych == ']')
{ {
goto yy5; goto json_parser_5;
} }
} }
else else
{ {
if (yych <= 'f') if (yych <= 'f')
{ {
goto yy17; goto json_parser_17;
} }
if (yych >= 'n') if (yych >= 'n')
{ {
goto yy15; goto json_parser_15;
} }
} }
} }
else { else
{
if (yych <= 'z') if (yych <= 'z')
{ {
if (yych == 't') if (yych == 't')
{ {
goto yy16; goto json_parser_16;
} }
} }
else { else
{
if (yych <= '{') if (yych <= '{')
{ {
goto yy7; goto json_parser_7;
} }
if (yych == '}') if (yych == '}')
{ {
goto yy9; goto json_parser_9;
} }
} }
} }
} }
yy2: json_parser_2:
m_cursor = m_marker; m_cursor = m_marker;
goto yy20; goto json_parser_20;
yy3: json_parser_3:
++m_cursor; ++m_cursor;
{ return token_type::begin_array; } {
yy5: return token_type::begin_array;
}
json_parser_5:
++m_cursor; ++m_cursor;
{ return token_type::end_array; } {
yy7: return token_type::end_array;
}
json_parser_7:
++m_cursor; ++m_cursor;
{ return token_type::begin_object; } {
yy9: return token_type::begin_object;
}
json_parser_9:
++m_cursor; ++m_cursor;
{ return token_type::end_object; } {
yy11: return token_type::end_object;
}
json_parser_11:
++m_cursor; ++m_cursor;
{ return token_type::value_separator; } {
yy13: return token_type::value_separator;
}
json_parser_13:
++m_cursor; ++m_cursor;
{ return token_type::name_separator; } {
yy15: return token_type::name_separator;
}
json_parser_15:
yych = *++m_cursor; yych = *++m_cursor;
if (yych == 'u') if (yych == 'u')
{ {
goto yy50; goto json_parser_50;
} }
goto yy2; goto json_parser_2;
yy16: json_parser_16:
yych = *++m_cursor; yych = *++m_cursor;
if (yych == 'r') if (yych == 'r')
{ {
goto yy46; goto json_parser_46;
} }
goto yy2; goto json_parser_2;
yy17: json_parser_17:
yych = *++m_cursor; yych = *++m_cursor;
if (yych == 'a') if (yych == 'a')
{ {
goto yy41; goto json_parser_41;
} }
goto yy2; goto json_parser_2;
yy18: json_parser_18:
yych = *++m_cursor; yych = *++m_cursor;
if (yych <= '/') if (yych <= '/')
{ {
goto yy2; goto json_parser_2;
} }
if (yych <= '0') if (yych <= '0')
{ {
goto yy19; goto json_parser_19;
} }
if (yych <= '9') if (yych <= '9')
{ {
goto yy21; goto json_parser_21;
} }
goto yy2; goto json_parser_2;
yy19: json_parser_19:
yych = *(m_marker = ++m_cursor); yych = *(m_marker = ++m_cursor);
if (yych <= 'D') if (yych <= 'D')
{ {
if (yych == '.') if (yych == '.')
{ {
goto yy34; goto json_parser_34;
} }
} }
else { else
{
if (yych <= 'E') if (yych <= 'E')
{ {
goto yy35; goto json_parser_35;
} }
if (yych == 'e') if (yych == 'e')
{ {
goto yy35; goto json_parser_35;
} }
} }
yy20: json_parser_20:
{ return token_type::value_number; } {
yy21: return token_type::value_number;
}
json_parser_21:
m_marker = ++m_cursor; m_marker = ++m_cursor;
yych = *m_cursor; yych = *m_cursor;
if (yybm[0 + yych] & 64) if (yybm[0 + yych] & 64)
{ {
goto yy21; goto json_parser_21;
} }
if (yych <= 'D') if (yych <= 'D')
{ {
if (yych == '.') if (yych == '.')
{ {
goto yy34; goto json_parser_34;
} }
goto yy20; goto json_parser_20;
} }
else { else
{
if (yych <= 'E') if (yych <= 'E')
{ {
goto yy35; goto json_parser_35;
} }
if (yych == 'e') if (yych == 'e')
{ {
goto yy35; goto json_parser_35;
} }
goto yy20; goto json_parser_20;
} }
yy23: json_parser_23:
++m_cursor; ++m_cursor;
yych = *m_cursor; yych = *m_cursor;
if (yybm[0 + yych] & 128) if (yybm[0 + yych] & 128)
{ {
goto yy23; goto json_parser_23;
} }
if (yych <= '"') if (yych <= '"')
{ {
goto yy28; goto json_parser_28;
} }
goto yy27; goto json_parser_27;
yy25: json_parser_25:
++m_cursor; ++m_cursor;
{ return token_type::end_of_input; } {
yy27: return token_type::end_of_input;
}
json_parser_27:
++m_cursor; ++m_cursor;
yych = *m_cursor; yych = *m_cursor;
if (yych <= 'e') if (yych <= 'e')
@ -2684,13 +2717,13 @@ yy27:
{ {
if (yych == '"') if (yych == '"')
{ {
goto yy23; goto json_parser_23;
} }
if (yych <= '.') if (yych <= '.')
{ {
goto yy2; goto json_parser_2;
} }
goto yy23; goto json_parser_23;
} }
else else
{ {
@ -2698,299 +2731,316 @@ yy27:
{ {
if (yych <= '[') if (yych <= '[')
{ {
goto yy2; goto json_parser_2;
} }
goto yy23; goto json_parser_23;
} }
else else
{ {
if (yych == 'b') if (yych == 'b')
{ {
goto yy23; goto json_parser_23;
} }
goto yy2; goto json_parser_2;
} }
} }
} }
else { else
{
if (yych <= 'q') if (yych <= 'q')
{ {
if (yych <= 'f') if (yych <= 'f')
{ {
goto yy23; goto json_parser_23;
} }
if (yych == 'n') if (yych == 'n')
{ {
goto yy23; goto json_parser_23;
} }
goto yy2; goto json_parser_2;
} }
else { else
{
if (yych <= 's') if (yych <= 's')
{ {
if (yych <= 'r') if (yych <= 'r')
{ {
goto yy23; goto json_parser_23;
} }
goto yy2; goto json_parser_2;
} }
else { else
{
if (yych <= 't') if (yych <= 't')
{ {
goto yy23; goto json_parser_23;
} }
if (yych <= 'u') if (yych <= 'u')
{ {
goto yy30; goto json_parser_30;
} }
goto yy2; goto json_parser_2;
} }
} }
} }
yy28: json_parser_28:
++m_cursor; ++m_cursor;
{ return token_type::value_string; } {
yy30: return token_type::value_string;
}
json_parser_30:
++m_cursor; ++m_cursor;
yych = *m_cursor; yych = *m_cursor;
if (yych <= '@') if (yych <= '@')
{ {
if (yych <= '/') if (yych <= '/')
{ {
goto yy2; goto json_parser_2;
} }
if (yych >= ':') if (yych >= ':')
{ {
goto yy2; goto json_parser_2;
} }
} }
else { else
{
if (yych <= 'F') if (yych <= 'F')
{ {
goto yy31; goto json_parser_31;
} }
if (yych <= '`') if (yych <= '`')
{ {
goto yy2; goto json_parser_2;
} }
if (yych >= 'g') if (yych >= 'g')
{ {
goto yy2; goto json_parser_2;
} }
} }
yy31: json_parser_31:
++m_cursor; ++m_cursor;
yych = *m_cursor; yych = *m_cursor;
if (yych <= '@') if (yych <= '@')
{ {
if (yych <= '/') if (yych <= '/')
{ {
goto yy2; goto json_parser_2;
} }
if (yych >= ':') if (yych >= ':')
{ {
goto yy2; goto json_parser_2;
} }
} }
else { else
{
if (yych <= 'F') if (yych <= 'F')
{ {
goto yy32; goto json_parser_32;
} }
if (yych <= '`') if (yych <= '`')
{ {
goto yy2; goto json_parser_2;
} }
if (yych >= 'g') if (yych >= 'g')
{ {
goto yy2; goto json_parser_2;
} }
} }
yy32: json_parser_32:
++m_cursor; ++m_cursor;
yych = *m_cursor; yych = *m_cursor;
if (yych <= '@') if (yych <= '@')
{ {
if (yych <= '/') if (yych <= '/')
{ {
goto yy2; goto json_parser_2;
} }
if (yych >= ':') if (yych >= ':')
{ {
goto yy2; goto json_parser_2;
} }
} }
else { else
{
if (yych <= 'F') if (yych <= 'F')
{ {
goto yy33; goto json_parser_33;
} }
if (yych <= '`') if (yych <= '`')
{ {
goto yy2; goto json_parser_2;
} }
if (yych >= 'g') if (yych >= 'g')
{ {
goto yy2; goto json_parser_2;
} }
} }
yy33: json_parser_33:
++m_cursor; ++m_cursor;
yych = *m_cursor; yych = *m_cursor;
if (yych <= '@') if (yych <= '@')
{ {
if (yych <= '/') if (yych <= '/')
{ {
goto yy2; goto json_parser_2;
} }
if (yych <= '9') if (yych <= '9')
{ {
goto yy23; goto json_parser_23;
} }
goto yy2; goto json_parser_2;
} }
else { else
{
if (yych <= 'F') if (yych <= 'F')
{ {
goto yy23; goto json_parser_23;
} }
if (yych <= '`') if (yych <= '`')
{ {
goto yy2; goto json_parser_2;
} }
if (yych <= 'f') if (yych <= 'f')
{ {
goto yy23; goto json_parser_23;
} }
goto yy2; goto json_parser_2;
} }
yy34: json_parser_34:
yych = *++m_cursor; yych = *++m_cursor;
if (yych <= '/') if (yych <= '/')
{ {
goto yy2; goto json_parser_2;
} }
if (yych <= '9') if (yych <= '9')
{ {
goto yy39; goto json_parser_39;
} }
goto yy2; goto json_parser_2;
yy35: json_parser_35:
yych = *++m_cursor; yych = *++m_cursor;
if (yych <= ',') if (yych <= ',')
{ {
if (yych != '+') if (yych != '+')
{ {
goto yy2; goto json_parser_2;
} }
} }
else { else
{
if (yych <= '-') if (yych <= '-')
{ {
goto yy36; goto json_parser_36;
} }
if (yych <= '/') if (yych <= '/')
{ {
goto yy2; goto json_parser_2;
} }
if (yych <= '9') if (yych <= '9')
{ {
goto yy37; goto json_parser_37;
} }
goto yy2; goto json_parser_2;
} }
yy36: json_parser_36:
yych = *++m_cursor; yych = *++m_cursor;
if (yych <= '/') if (yych <= '/')
{ {
goto yy2; goto json_parser_2;
} }
if (yych >= ':') if (yych >= ':')
{ {
goto yy2; goto json_parser_2;
} }
yy37: json_parser_37:
++m_cursor; ++m_cursor;
yych = *m_cursor; yych = *m_cursor;
if (yych <= '/') if (yych <= '/')
{ {
goto yy20; goto json_parser_20;
} }
if (yych <= '9') if (yych <= '9')
{ {
goto yy37; goto json_parser_37;
} }
goto yy20; goto json_parser_20;
yy39: json_parser_39:
m_marker = ++m_cursor; m_marker = ++m_cursor;
yych = *m_cursor; yych = *m_cursor;
if (yych <= 'D') if (yych <= 'D')
{ {
if (yych <= '/') if (yych <= '/')
{ {
goto yy20; goto json_parser_20;
} }
if (yych <= '9') if (yych <= '9')
{ {
goto yy39; goto json_parser_39;
} }
goto yy20; goto json_parser_20;
} }
else { else
{
if (yych <= 'E') if (yych <= 'E')
{ {
goto yy35; goto json_parser_35;
} }
if (yych == 'e') if (yych == 'e')
{ {
goto yy35; goto json_parser_35;
} }
goto yy20; goto json_parser_20;
} }
yy41: json_parser_41:
yych = *++m_cursor; yych = *++m_cursor;
if (yych != 'l') if (yych != 'l')
{ {
goto yy2; goto json_parser_2;
} }
yych = *++m_cursor; yych = *++m_cursor;
if (yych != 's') if (yych != 's')
{ {
goto yy2; goto json_parser_2;
} }
yych = *++m_cursor; yych = *++m_cursor;
if (yych != 'e') if (yych != 'e')
{ {
goto yy2; goto json_parser_2;
} }
++m_cursor; ++m_cursor;
{ return token_type::literal_false; } {
yy46: return token_type::literal_false;
}
json_parser_46:
yych = *++m_cursor; yych = *++m_cursor;
if (yych != 'u') if (yych != 'u')
{ {
goto yy2; goto json_parser_2;
} }
yych = *++m_cursor; yych = *++m_cursor;
if (yych != 'e') if (yych != 'e')
{ {
goto yy2; goto json_parser_2;
} }
++m_cursor; ++m_cursor;
{ return token_type::literal_true; } {
yy50: return token_type::literal_true;
}
json_parser_50:
yych = *++m_cursor; yych = *++m_cursor;
if (yych != 'l') if (yych != 'l')
{ {
goto yy2; goto json_parser_2;
} }
yych = *++m_cursor; yych = *++m_cursor;
if (yych != 'l') if (yych != 'l')
{ {
goto yy2; goto json_parser_2;
} }
++m_cursor; ++m_cursor;
{ return token_type::literal_null; } {
return token_type::literal_null;
}
} }
} }
@ -3001,11 +3051,11 @@ yy50:
} }
/*! /*!
The pointer m_begin points to the opening quote of the string, and The pointer m_start points to the opening quote of the string, and
m_cursor past the closing quote of the string. We create a std::string from m_cursor past the closing quote of the string. We create a std::string
the character after the opening quotes (m_begin+1) until the character from the character after the opening quotes (m_begin+1) until the
before the closing quotes (hence subtracting 2 characters from the pointer character before the closing quotes (hence subtracting 2 characters
difference of the two pointers). from the pointer difference of the two pointers).
@return string value of current token without opening and closing quotes @return string value of current token without opening and closing quotes
@ -3018,14 +3068,13 @@ yy50:
inline number_float_t get_number() const inline number_float_t get_number() const
{ {
// The pointer m_begin points to the beginning of the // The pointer m_begin points to the beginning of the parsed
// parsed number. We pass this pointer to std::strtod which // number. We pass this pointer to std::strtod which sets endptr to
// sets endptr to the first character past the converted // the first character past the converted number. If this pointer is
// number. If this pointer is not the same as m_cursor, // not the same as m_cursor, then either more or less characters
// then either more or less characters have been used // have been used during the comparison. This can happen for inputs
// during the comparison. This can happen for inputs like // like "01" which will be treated like number 0 followed by number
// "01" which will be treated like number 0 followed by // 1.
// number 1.
// conversion // conversion
char* endptr; char* endptr;
@ -3044,13 +3093,16 @@ yy50:
} }
private: private:
/// the buffer
const char* m_content = nullptr; const char* m_content = nullptr;
/// pointer to he beginning of the current symbol
const char* m_start = nullptr; const char* m_start = nullptr;
/// pointer to the current symbol
const char* m_cursor = nullptr; const char* m_cursor = nullptr;
/// pointer to the end of the buffer
const char* m_limit = nullptr; const char* m_limit = nullptr;
/// pointer for backtracking information
const char* m_marker = nullptr; const char* m_marker = nullptr;
const char* m_ctxmarker = nullptr;
}; };
class parser class parser

View file

@ -2415,19 +2415,30 @@ class basic_json
inline lexer() = default; inline lexer() = default;
/*!max:re2c */ /*!
This function implements a scanner for JSON. It is specified using
regular expressions that try to follow RFC 7159 and ECMA-404 as close
as possible. These regular expressions are then translated into a
deterministic finite automaton (DFA) by the tool RE2C. As a result, the
translated code for this function consists of a large block of code
with goto jumps.
@return the class of the next token read from the buffer
@todo Unicode support needs to be checked.
*/
inline token_type scan() inline token_type scan()
{ {
#define YYFILL(n) m_start = m_cursor;
/*!re2c /*!re2c
re2c:define:YYCURSOR = m_cursor; re2c:define:YYCURSOR = m_cursor;
re2c:define:YYLIMIT = m_limit; re2c:define:YYLIMIT = m_limit;
re2c:define:YYCTYPE = char; re2c:define:YYCTYPE = char;
re2c:define:YYCTXMARKER = m_ctxmarker;
re2c:define:YYMARKER = m_marker; re2c:define:YYMARKER = m_marker;
re2c:indent:top = 1; re2c:indent:top = 1;
re2c:yyfill:enable = 0; re2c:yyfill:enable = 0;
re2c:labelprefix = "json_parser_";
// structural characters // structural characters
"[" { return token_type::begin_array; } "[" { return token_type::begin_array; }
@ -2466,7 +2477,7 @@ class basic_json
string { return token_type::value_string; } string { return token_type::value_string; }
// end of file // end of file
'\000' { return token_type::end_of_input; } '\000' { return token_type::end_of_input; }
*/ */
} }
@ -2476,11 +2487,11 @@ class basic_json
} }
/*! /*!
The pointer m_begin points to the opening quote of the string, and The pointer m_start points to the opening quote of the string, and
m_cursor past the closing quote of the string. We create a std::string from m_cursor past the closing quote of the string. We create a std::string
the character after the opening quotes (m_begin+1) until the character from the character after the opening quotes (m_begin+1) until the
before the closing quotes (hence subtracting 2 characters from the pointer character before the closing quotes (hence subtracting 2 characters
difference of the two pointers). from the pointer difference of the two pointers).
@return string value of current token without opening and closing quotes @return string value of current token without opening and closing quotes
@ -2493,14 +2504,13 @@ class basic_json
inline number_float_t get_number() const inline number_float_t get_number() const
{ {
// The pointer m_begin points to the beginning of the // The pointer m_begin points to the beginning of the parsed
// parsed number. We pass this pointer to std::strtod which // number. We pass this pointer to std::strtod which sets endptr to
// sets endptr to the first character past the converted // the first character past the converted number. If this pointer is
// number. If this pointer is not the same as m_cursor, // not the same as m_cursor, then either more or less characters
// then either more or less characters have been used // have been used during the comparison. This can happen for inputs
// during the comparison. This can happen for inputs like // like "01" which will be treated like number 0 followed by number
// "01" which will be treated like number 0 followed by // 1.
// number 1.
// conversion // conversion
char* endptr; char* endptr;
@ -2519,13 +2529,16 @@ class basic_json
} }
private: private:
/// the buffer
const char* m_content = nullptr; const char* m_content = nullptr;
/// pointer to he beginning of the current symbol
const char* m_start = nullptr; const char* m_start = nullptr;
/// pointer to the current symbol
const char* m_cursor = nullptr; const char* m_cursor = nullptr;
/// pointer to the end of the buffer
const char* m_limit = nullptr; const char* m_limit = nullptr;
/// pointer for backtracking information
const char* m_marker = nullptr; const char* m_marker = nullptr;
const char* m_ctxmarker = nullptr;
}; };
class parser class parser

View file

@ -3892,43 +3892,27 @@ TEST_CASE("deserialization")
{ {
SECTION("string") SECTION("string")
{ {
// auto s = "[\"foo\",1,2,3,false,{\"one\":1}]"; auto s = "[\"foo\",1,2,3,false,{\"one\":1}]";
// json j = json::parse(s);
// CHECK(j == json({"foo", 1, 2, 3, false, {{"one", 1}}}));
auto s = "null";
json j = json::parse(s); json j = json::parse(s);
CHECK(j == json()); CHECK(j == json({"foo", 1, 2, 3, false, {{"one", 1}}}));
} }
SECTION("operator<<") SECTION("operator<<")
{ {
// std::stringstream ss;
// ss << "[\"foo\",1,2,3,false,{\"one\":1}]";
// json j;
// j << ss;
// CHECK(j == json({"foo", 1, 2, 3, false, {{"one", 1}}}));
std::stringstream ss; std::stringstream ss;
ss << "null"; ss << "[\"foo\",1,2,3,false,{\"one\":1}]";
json j; json j;
j << ss; j << ss;
CHECK(j == json()); CHECK(j == json({"foo", 1, 2, 3, false, {{"one", 1}}}));
} }
SECTION("operator>>") SECTION("operator>>")
{ {
// std::stringstream ss;
// ss << "[\"foo\",1,2,3,false,{\"one\":1}]";
// json j;
// ss >> j;
// CHECK(j == json({"foo", 1, 2, 3, false, {{"one", 1}}}));
std::stringstream ss; std::stringstream ss;
ss << "null"; ss << "[\"foo\",1,2,3,false,{\"one\":1}]";
json j; json j;
ss >> j; ss >> j;
CHECK(j == json()); CHECK(j == json({"foo", 1, 2, 3, false, {{"one", 1}}}));
} }
} }