🔨 improved diagnostic output

This commit is contained in:
Niels Lohmann 2017-04-15 10:40:10 +02:00
parent 717106eced
commit db9bf953f3
No known key found for this signature in database
GPG key ID: 7F3CEA63AE251B69
2 changed files with 39 additions and 28 deletions

View file

@ -11127,6 +11127,13 @@ class basic_json
return codepoint; return codepoint;
} }
static std::string codepoint_to_string(int codepoint)
{
std::stringstream ss;
ss << "U+" << std::setw(4) << std::uppercase << std::setfill('0') << std::hex << codepoint;
return ss.str();
}
token_type scan_string() token_type scan_string()
{ {
// reset yytext (ignore opening quote) // reset yytext (ignore opening quote)
@ -11237,13 +11244,13 @@ class basic_json
} }
else else
{ {
error_message = "invalid string: invalid low surrogate"; error_message = "invalid string: surrogate " + codepoint_to_string(codepoint1) + " must be followed by U+DC00..U+DFFF instead of " + codepoint_to_string(codepoint2);
return token_type::parse_error; return token_type::parse_error;
} }
} }
else else
{ {
error_message = "invalid string: missing low surrogate"; error_message = "invalid string: surrogate " + codepoint_to_string(codepoint1) + " must be followed by U+DC00..U+DFFF";
return token_type::parse_error; return token_type::parse_error;
} }
} }
@ -11251,7 +11258,7 @@ class basic_json
{ {
if (JSON_UNLIKELY(0xDC00 <= codepoint1 and codepoint1 <= 0xDFFF)) if (JSON_UNLIKELY(0xDC00 <= codepoint1 and codepoint1 <= 0xDFFF))
{ {
error_message = "invalid string: missing high surrogate"; error_message = "invalid string: surrogate " + codepoint_to_string(codepoint1) + " must follow U+D800..U+DBFF";
return token_type::parse_error; return token_type::parse_error;
} }
@ -11336,7 +11343,7 @@ class basic_json
case 0x1e: case 0x1e:
case 0x1f: case 0x1f:
{ {
error_message = "invalid string: control characters (U+0000 through U+001f) must be escaped"; error_message = "invalid string: control character " + codepoint_to_string(current) + " must be escaped";
return token_type::parse_error; return token_type::parse_error;
} }
@ -11480,7 +11487,7 @@ class basic_json
continue; continue;
} }
error_message = "invalid string: not well-formed UTF-8 byte"; error_message = "invalid string: ill-formed UTF-8 byte";
return token_type::parse_error; return token_type::parse_error;
} }
@ -11500,7 +11507,7 @@ class basic_json
} }
} }
error_message = "invalid string: not well-formed UTF-8 byte"; error_message = "invalid string: ill-formed UTF-8 byte";
return token_type::parse_error; return token_type::parse_error;
} }
@ -11534,7 +11541,7 @@ class basic_json
} }
} }
error_message = "invalid string: not well-formed UTF-8 byte"; error_message = "invalid string: ill-formed UTF-8 byte";
return token_type::parse_error; return token_type::parse_error;
} }
@ -11554,7 +11561,7 @@ class basic_json
} }
} }
error_message = "invalid string: not well-formed UTF-8 byte"; error_message = "invalid string: ill-formed UTF-8 byte";
return token_type::parse_error; return token_type::parse_error;
} }
@ -11579,7 +11586,7 @@ class basic_json
} }
} }
error_message = "invalid string: not well-formed UTF-8 byte"; error_message = "invalid string: ill-formed UTF-8 byte";
return token_type::parse_error; return token_type::parse_error;
} }
@ -11606,7 +11613,7 @@ class basic_json
} }
} }
error_message = "invalid string: not well-formed UTF-8 byte"; error_message = "invalid string: ill-formed UTF-8 byte";
return token_type::parse_error; return token_type::parse_error;
} }
@ -11631,14 +11638,14 @@ class basic_json
} }
} }
error_message = "invalid string: not well-formed UTF-8 byte"; error_message = "invalid string: ill-formed UTF-8 byte";
return token_type::parse_error; return token_type::parse_error;
} }
// remaining bytes (80..C1 and F5..FF) are not well-formed // remaining bytes (80..C1 and F5..FF) are ill-formed
default: default:
{ {
error_message = "invalid string: not well-formed UTF-8 byte"; error_message = "invalid string: ill-formed UTF-8 byte";
return token_type::parse_error; return token_type::parse_error;
} }
} }
@ -11681,7 +11688,7 @@ class basic_json
// be changed if minus sign, decimal point or exponent is read // be changed if minus sign, decimal point or exponent is read
token_type number_type = token_type::value_unsigned; token_type number_type = token_type::value_unsigned;
// state: we just found out we need to scan a number // state (init): we just found out we need to scan a number
switch (current) switch (current)
{ {
case '-': case '-':
@ -12001,6 +12008,8 @@ scan_number_done:
} }
} }
// this code is reached if we parse a floating-point number or if
// an integer conversion above failed
strtof(value_float, yytext.data(), nullptr); strtof(value_float, yytext.data(), nullptr);
return token_type::value_float; return token_type::value_float;
} }
@ -12064,7 +12073,8 @@ scan_number_done:
/// add a character to yytext /// add a character to yytext
void add(int c) void add(int c)
{ {
// resize yytext if necessary // resize yytext if necessary; this condition is deemed unlikely,
// because we start with a 1024-byte buffer
if (JSON_UNLIKELY((yylen + 1 > yytext.capacity()))) if (JSON_UNLIKELY((yylen + 1 > yytext.capacity())))
{ {
yytext.resize(2 * yytext.capacity(), '\0'); yytext.resize(2 * yytext.capacity(), '\0');
@ -12120,7 +12130,7 @@ scan_number_done:
std::string s = ia->read(start_pos, chars_read - start_pos); std::string s = ia->read(start_pos, chars_read - start_pos);
// escape control characters // escape control characters
std::stringstream ss; std::string result;
for (auto c : s) for (auto c : s)
{ {
if (c == '\0' or c == std::char_traits<char>::eof()) if (c == '\0' or c == std::char_traits<char>::eof())
@ -12131,16 +12141,16 @@ scan_number_done:
else if ('\x00' <= c and c <= '\x1f') else if ('\x00' <= c and c <= '\x1f')
{ {
// escape control characters // escape control characters
ss << "<U+" << std::setw(4) << std::setfill('0') << std::hex << int(c) << ">"; result += "<" + codepoint_to_string(c) + ">";
} }
else else
{ {
// add character as is // add character as is
ss << c; result.append(1, c);
} }
} }
return ss.str(); return result;
} }
/// return syntax error message /// return syntax error message
@ -12204,7 +12214,8 @@ scan_number_done:
case '9': case '9':
return scan_number(); return scan_number();
// end of input // end of input (the null byte is needed when parsing from
// string literals)
case '\0': case '\0':
case std::char_traits<char>::eof(): case std::char_traits<char>::eof():
return token_type::end_of_input; return token_type::end_of_input;

View file

@ -98,18 +98,18 @@ TEST_CASE("parser class")
// error: tab in string // error: tab in string
CHECK_THROWS_AS(parse_string("\"\t\"").parse(), json::parse_error); CHECK_THROWS_AS(parse_string("\"\t\"").parse(), json::parse_error);
CHECK_THROWS_WITH(parse_string("\"\t\"").parse(), CHECK_THROWS_WITH(parse_string("\"\t\"").parse(),
"[json.exception.parse_error.101] parse error at 2: syntax error - invalid string: control characters (U+0000 through U+001f) must be escaped; last read '\"<U+0009>'"); "[json.exception.parse_error.101] parse error at 2: syntax error - invalid string: control character U+0009 must be escaped; last read '\"<U+0009>'");
// error: newline in string // error: newline in string
CHECK_THROWS_AS(parse_string("\"\n\"").parse(), json::parse_error); CHECK_THROWS_AS(parse_string("\"\n\"").parse(), json::parse_error);
CHECK_THROWS_AS(parse_string("\"\r\"").parse(), json::parse_error); CHECK_THROWS_AS(parse_string("\"\r\"").parse(), json::parse_error);
CHECK_THROWS_WITH(parse_string("\"\n\"").parse(), CHECK_THROWS_WITH(parse_string("\"\n\"").parse(),
"[json.exception.parse_error.101] parse error at 2: syntax error - invalid string: control characters (U+0000 through U+001f) must be escaped; last read '\"<U+000a>'"); "[json.exception.parse_error.101] parse error at 2: syntax error - invalid string: control character U+000A must be escaped; last read '\"<U+000A>'");
CHECK_THROWS_WITH(parse_string("\"\r\"").parse(), CHECK_THROWS_WITH(parse_string("\"\r\"").parse(),
"[json.exception.parse_error.101] parse error at 2: syntax error - invalid string: control characters (U+0000 through U+001f) must be escaped; last read '\"<U+000d>'"); "[json.exception.parse_error.101] parse error at 2: syntax error - invalid string: control character U+000D must be escaped; last read '\"<U+000D>'");
// error: backspace in string // error: backspace in string
CHECK_THROWS_AS(parse_string("\"\b\"").parse(), json::parse_error); CHECK_THROWS_AS(parse_string("\"\b\"").parse(), json::parse_error);
CHECK_THROWS_WITH(parse_string("\"\b\"").parse(), CHECK_THROWS_WITH(parse_string("\"\b\"").parse(),
"[json.exception.parse_error.101] parse error at 2: syntax error - invalid string: control characters (U+0000 through U+001f) must be escaped; last read '\"<U+0008>'"); "[json.exception.parse_error.101] parse error at 2: syntax error - invalid string: control character U+0008 must be escaped; last read '\"<U+0008>'");
// improve code coverage // improve code coverage
CHECK_THROWS_AS(parse_string("\uFF01").parse(), json::parse_error); CHECK_THROWS_AS(parse_string("\uFF01").parse(), json::parse_error);
CHECK_THROWS_AS(parse_string("[-4:1,]").parse(), json::parse_error); CHECK_THROWS_AS(parse_string("[-4:1,]").parse(), json::parse_error);
@ -648,17 +648,17 @@ TEST_CASE("parser class")
// missing part of a surrogate pair // missing part of a surrogate pair
CHECK_THROWS_AS(json::parse("\"\\uD80C\""), json::parse_error); CHECK_THROWS_AS(json::parse("\"\\uD80C\""), json::parse_error);
CHECK_THROWS_WITH(json::parse("\"\\uD80C\""), CHECK_THROWS_WITH(json::parse("\"\\uD80C\""),
"[json.exception.parse_error.101] parse error at 8: syntax error - invalid string: missing low surrogate; last read '\"\\uD80C\"'"); "[json.exception.parse_error.101] parse error at 8: syntax error - invalid string: surrogate U+D80C must be followed by U+DC00..U+DFFF; last read '\"\\uD80C\"'");
// invalid surrogate pair // invalid surrogate pair
CHECK_THROWS_AS(json::parse("\"\\uD80C\\uD80C\""), json::parse_error); CHECK_THROWS_AS(json::parse("\"\\uD80C\\uD80C\""), json::parse_error);
CHECK_THROWS_AS(json::parse("\"\\uD80C\\u0000\""), json::parse_error); CHECK_THROWS_AS(json::parse("\"\\uD80C\\u0000\""), json::parse_error);
CHECK_THROWS_AS(json::parse("\"\\uD80C\\uFFFF\""), json::parse_error); CHECK_THROWS_AS(json::parse("\"\\uD80C\\uFFFF\""), json::parse_error);
CHECK_THROWS_WITH(json::parse("\"\\uD80C\\uD80C\""), CHECK_THROWS_WITH(json::parse("\"\\uD80C\\uD80C\""),
"[json.exception.parse_error.101] parse error at 13: syntax error - invalid string: invalid low surrogate; last read '\"\\uD80C\\uD80C'"); "[json.exception.parse_error.101] parse error at 13: syntax error - invalid string: surrogate U+D80C must be followed by U+DC00..U+DFFF instead of U+D80C; last read '\"\\uD80C\\uD80C'");
CHECK_THROWS_WITH(json::parse("\"\\uD80C\\u0000\""), CHECK_THROWS_WITH(json::parse("\"\\uD80C\\u0000\""),
"[json.exception.parse_error.101] parse error at 13: syntax error - invalid string: invalid low surrogate; last read '\"\\uD80C\\u0000'"); "[json.exception.parse_error.101] parse error at 13: syntax error - invalid string: surrogate U+D80C must be followed by U+DC00..U+DFFF instead of U+0000; last read '\"\\uD80C\\u0000'");
CHECK_THROWS_WITH(json::parse("\"\\uD80C\\uFFFF\""), CHECK_THROWS_WITH(json::parse("\"\\uD80C\\uFFFF\""),
"[json.exception.parse_error.101] parse error at 13: syntax error - invalid string: invalid low surrogate; last read '\"\\uD80C\\uFFFF'"); "[json.exception.parse_error.101] parse error at 13: syntax error - invalid string: surrogate U+D80C must be followed by U+DC00..U+DFFF instead of U+FFFF; last read '\"\\uD80C\\uFFFF'");
} }
SECTION("tests found by mutate++") SECTION("tests found by mutate++")