🔨 fixed \uxxxx parsing

This commit is contained in:
Niels Lohmann 2017-03-26 15:29:08 +02:00
parent f47cf152a8
commit 99e0d8b339
No known key found for this signature in database
GPG key ID: 7F3CEA63AE251B69
2 changed files with 90 additions and 62 deletions

View file

@ -42,6 +42,7 @@ SOFTWARE.
#include <forward_list> // forward_list #include <forward_list> // forward_list
#include <functional> // function, hash, less #include <functional> // function, hash, less
#include <initializer_list> // initializer_list #include <initializer_list> // initializer_list
#include <iomanip> // hex
#include <iostream> // istream, ostream #include <iostream> // istream, ostream
#include <iterator> // advance, begin, back_inserter, bidirectional_iterator_tag, distance, end, inserter, iterator, iterator_traits, next, random_access_iterator_tag, reverse_iterator #include <iterator> // advance, begin, back_inserter, bidirectional_iterator_tag, distance, end, inserter, iterator, iterator_traits, next, random_access_iterator_tag, reverse_iterator
#include <limits> // numeric_limits #include <limits> // numeric_limits
@ -10542,37 +10543,31 @@ class basic_json
// must be called after \u was read; returns following xxxx as hex or -1 when error // must be called after \u was read; returns following xxxx as hex or -1 when error
int get_codepoint() int get_codepoint()
{ {
// read xxxx of \uxxxx // a mapping to discover hex numbers
std::vector<char> buffer(5, '\0'); static int8_t ascii_to_hex[256] = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1};
int codepoint = 0;
for (size_t i = 0; i < 4; ++i) for (size_t i = 0; i < 4; ++i)
{ {
get(); const int8_t digit = ascii_to_hex[static_cast<unsigned char>(get())];
if (JSON_UNLIKELY(current != std::char_traits<char>::eof())) if (JSON_UNLIKELY(digit == -1))
{ {
buffer[i] = static_cast<char>(current); return -1;
} }
else else
{ {
// error message will be created by caller codepoint += digit;
return -1;
}
} }
char* endptr; if (i != 3)
errno = 0;
const int codepoint = static_cast<int>(std::strtoul(buffer.data(), &endptr, 16));
if (JSON_LIKELY(errno == 0 and endptr == buffer.data() + 4))
{ {
codepoint <<= 4;
}
}
return codepoint; return codepoint;
} }
else
{
// conversion incomplete or failure
return -1;
}
}
token_type scan_string() token_type scan_string()
{ {

View file

@ -91,18 +91,18 @@ TEST_CASE("parser class")
// error: tab in string // error: tab in string
CHECK_THROWS_AS(json::parser("\"\t\"").parse(), json::parse_error); CHECK_THROWS_AS(json::parser("\"\t\"").parse(), json::parse_error);
CHECK_THROWS_WITH(json::parser("\"\t\"").parse(), CHECK_THROWS_WITH(json::parser("\"\t\"").parse(),
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); "[json.exception.parse_error.101] parse error at 2: syntax error - invalid string: control characters (U+0000 through U+001f) must be escaped; last read '\"<U+0009>'");
// error: newline in string // error: newline in string
CHECK_THROWS_AS(json::parser("\"\n\"").parse(), json::parse_error); CHECK_THROWS_AS(json::parser("\"\n\"").parse(), json::parse_error);
CHECK_THROWS_AS(json::parser("\"\r\"").parse(), json::parse_error); CHECK_THROWS_AS(json::parser("\"\r\"").parse(), json::parse_error);
CHECK_THROWS_WITH(json::parser("\"\n\"").parse(), CHECK_THROWS_WITH(json::parser("\"\n\"").parse(),
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); "[json.exception.parse_error.101] parse error at 2: syntax error - invalid string: control characters (U+0000 through U+001f) must be escaped; last read '\"<U+000a>'");
CHECK_THROWS_WITH(json::parser("\"\r\"").parse(), CHECK_THROWS_WITH(json::parser("\"\r\"").parse(),
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); "[json.exception.parse_error.101] parse error at 2: syntax error - invalid string: control characters (U+0000 through U+001f) must be escaped; last read '\"<U+000d>'");
// error: backspace in string // error: backspace in string
CHECK_THROWS_AS(json::parser("\"\b\"").parse(), json::parse_error); CHECK_THROWS_AS(json::parser("\"\b\"").parse(), json::parse_error);
CHECK_THROWS_WITH(json::parser("\"\b\"").parse(), CHECK_THROWS_WITH(json::parser("\"\b\"").parse(),
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); "[json.exception.parse_error.101] parse error at 2: syntax error - invalid string: control characters (U+0000 through U+001f) must be escaped; last read '\"<U+0008>'");
// improve code coverage // improve code coverage
CHECK_THROWS_AS(json::parser("\uFF01").parse(), json::parse_error); CHECK_THROWS_AS(json::parser("\uFF01").parse(), json::parse_error);
CHECK_THROWS_AS(json::parser("[-4:1,]").parse(), json::parse_error); CHECK_THROWS_AS(json::parser("[-4:1,]").parse(), json::parse_error);
@ -393,34 +393,37 @@ TEST_CASE("parser class")
CHECK_THROWS_AS(json::parser("n").parse(), json::parse_error); CHECK_THROWS_AS(json::parser("n").parse(), json::parse_error);
CHECK_THROWS_AS(json::parser("nu").parse(), json::parse_error); CHECK_THROWS_AS(json::parser("nu").parse(), json::parse_error);
CHECK_THROWS_AS(json::parser("nul").parse(), json::parse_error); CHECK_THROWS_AS(json::parser("nul").parse(), json::parse_error);
CHECK_THROWS_WITH(json::parser("n").parse(), "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected 'n'"); CHECK_THROWS_WITH(json::parser("n").parse(),
"[json.exception.parse_error.101] parse error at 2: syntax error - invalid literal; expected 'null'; last read 'n'");
CHECK_THROWS_WITH(json::parser("nu").parse(), CHECK_THROWS_WITH(json::parser("nu").parse(),
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected 'n'"); "[json.exception.parse_error.101] parse error at 3: syntax error - invalid literal; expected 'null'; last read 'nu'");
CHECK_THROWS_WITH(json::parser("nul").parse(), CHECK_THROWS_WITH(json::parser("nul").parse(),
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected 'n'"); "[json.exception.parse_error.101] parse error at 4: syntax error - invalid literal; expected 'null'; last read 'nul'");
// unexpected end of true // unexpected end of true
CHECK_THROWS_AS(json::parser("t").parse(), json::parse_error); CHECK_THROWS_AS(json::parser("t").parse(), json::parse_error);
CHECK_THROWS_AS(json::parser("tr").parse(), json::parse_error); CHECK_THROWS_AS(json::parser("tr").parse(), json::parse_error);
CHECK_THROWS_AS(json::parser("tru").parse(), json::parse_error); CHECK_THROWS_AS(json::parser("tru").parse(), json::parse_error);
CHECK_THROWS_WITH(json::parser("t").parse(), "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected 't'"); CHECK_THROWS_WITH(json::parser("t").parse(),
"[json.exception.parse_error.101] parse error at 2: syntax error - invalid literal; expected 'true'; last read 't'");
CHECK_THROWS_WITH(json::parser("tr").parse(), CHECK_THROWS_WITH(json::parser("tr").parse(),
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected 't'"); "[json.exception.parse_error.101] parse error at 3: syntax error - invalid literal; expected 'true'; last read 'tr'");
CHECK_THROWS_WITH(json::parser("tru").parse(), CHECK_THROWS_WITH(json::parser("tru").parse(),
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected 't'"); "[json.exception.parse_error.101] parse error at 4: syntax error - invalid literal; expected 'true'; last read 'tru'");
// unexpected end of false // unexpected end of false
CHECK_THROWS_AS(json::parser("f").parse(), json::parse_error); CHECK_THROWS_AS(json::parser("f").parse(), json::parse_error);
CHECK_THROWS_AS(json::parser("fa").parse(), json::parse_error); CHECK_THROWS_AS(json::parser("fa").parse(), json::parse_error);
CHECK_THROWS_AS(json::parser("fal").parse(), json::parse_error); CHECK_THROWS_AS(json::parser("fal").parse(), json::parse_error);
CHECK_THROWS_AS(json::parser("fals").parse(), json::parse_error); CHECK_THROWS_AS(json::parser("fals").parse(), json::parse_error);
CHECK_THROWS_WITH(json::parser("f").parse(), "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected 'f'"); CHECK_THROWS_WITH(json::parser("f").parse(),
"[json.exception.parse_error.101] parse error at 2: syntax error - invalid literal; expected 'false'; last read 'f'");
CHECK_THROWS_WITH(json::parser("fa").parse(), CHECK_THROWS_WITH(json::parser("fa").parse(),
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected 'f'"); "[json.exception.parse_error.101] parse error at 3: syntax error - invalid literal; expected 'false'; last read 'fa'");
CHECK_THROWS_WITH(json::parser("fal").parse(), CHECK_THROWS_WITH(json::parser("fal").parse(),
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected 'f'"); "[json.exception.parse_error.101] parse error at 4: syntax error - invalid literal; expected 'false'; last read 'fal'");
CHECK_THROWS_WITH(json::parser("fals").parse(), CHECK_THROWS_WITH(json::parser("fals").parse(),
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected 'f'"); "[json.exception.parse_error.101] parse error at 5: syntax error - invalid literal; expected 'false'; last read 'fals'");
// missing/unexpected end of array // missing/unexpected end of array
CHECK_THROWS_AS(json::parser("[").parse(), json::parse_error); CHECK_THROWS_AS(json::parser("[").parse(), json::parse_error);
@ -471,25 +474,25 @@ TEST_CASE("parser class")
CHECK_THROWS_AS(json::parser("\"\\u01").parse(), json::parse_error); CHECK_THROWS_AS(json::parser("\"\\u01").parse(), json::parse_error);
CHECK_THROWS_AS(json::parser("\"\\u012").parse(), json::parse_error); CHECK_THROWS_AS(json::parser("\"\\u012").parse(), json::parse_error);
CHECK_THROWS_WITH(json::parser("\"").parse(), CHECK_THROWS_WITH(json::parser("\"").parse(),
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); "[json.exception.parse_error.101] parse error at 2: syntax error - invalid string: missing closing quote; last read '\"'");
CHECK_THROWS_WITH(json::parser("\"\\\"").parse(), CHECK_THROWS_WITH(json::parser("\"\\\"").parse(),
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); "[json.exception.parse_error.101] parse error at 4: syntax error - invalid string: missing closing quote; last read '\"\\\"'");
CHECK_THROWS_WITH(json::parser("\"\\u\"").parse(), CHECK_THROWS_WITH(json::parser("\"\\u\"").parse(),
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); "[json.exception.parse_error.101] parse error at 4: syntax error - invalid string: '\\u' must be followed by 4 hex digits; last read '\"\\u\"'");
CHECK_THROWS_WITH(json::parser("\"\\u0\"").parse(), CHECK_THROWS_WITH(json::parser("\"\\u0\"").parse(),
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); "[json.exception.parse_error.101] parse error at 5: syntax error - invalid string: '\\u' must be followed by 4 hex digits; last read '\"\\u0\"'");
CHECK_THROWS_WITH(json::parser("\"\\u01\"").parse(), CHECK_THROWS_WITH(json::parser("\"\\u01\"").parse(),
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); "[json.exception.parse_error.101] parse error at 6: syntax error - invalid string: '\\u' must be followed by 4 hex digits; last read '\"\\u01\"'");
CHECK_THROWS_WITH(json::parser("\"\\u012\"").parse(), CHECK_THROWS_WITH(json::parser("\"\\u012\"").parse(),
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); "[json.exception.parse_error.101] parse error at 7: syntax error - invalid string: '\\u' must be followed by 4 hex digits; last read '\"\\u012\"'");
CHECK_THROWS_WITH(json::parser("\"\\u").parse(), CHECK_THROWS_WITH(json::parser("\"\\u").parse(),
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); "[json.exception.parse_error.101] parse error at 4: syntax error - invalid string: '\\u' must be followed by 4 hex digits; last read '\"\\u'");
CHECK_THROWS_WITH(json::parser("\"\\u0").parse(), CHECK_THROWS_WITH(json::parser("\"\\u0").parse(),
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); "[json.exception.parse_error.101] parse error at 5: syntax error - invalid string: '\\u' must be followed by 4 hex digits; last read '\"\\u0'");
CHECK_THROWS_WITH(json::parser("\"\\u01").parse(), CHECK_THROWS_WITH(json::parser("\"\\u01").parse(),
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); "[json.exception.parse_error.101] parse error at 6: syntax error - invalid string: '\\u' must be followed by 4 hex digits; last read '\"\\u01'");
CHECK_THROWS_WITH(json::parser("\"\\u012").parse(), CHECK_THROWS_WITH(json::parser("\"\\u012").parse(),
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); "[json.exception.parse_error.101] parse error at 7: syntax error - invalid string: '\\u' must be followed by 4 hex digits; last read '\"\\u012'");
// invalid escapes // invalid escapes
for (int c = 1; c < 128; ++c) for (int c = 1; c < 128; ++c)
@ -522,8 +525,12 @@ TEST_CASE("parser class")
default: default:
{ {
CHECK_THROWS_AS(json::parser(s.c_str()).parse(), json::parse_error); CHECK_THROWS_AS(json::parser(s.c_str()).parse(), json::parse_error);
// only check error message if c is not a control character
if (c > 0x1f)
{
CHECK_THROWS_WITH(json::parser(s.c_str()).parse(), CHECK_THROWS_WITH(json::parser(s.c_str()).parse(),
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); "[json.exception.parse_error.101] parse error at 3: syntax error - invalid string: forbidden character after backspace; last read '\"\\" + std::string(1, c) + "'");
}
break; break;
} }
} }
@ -581,26 +588,52 @@ TEST_CASE("parser class")
if (valid(c)) if (valid(c))
{ {
CAPTURE(s1);
CHECK_NOTHROW(json::parser(s1.c_str()).parse()); CHECK_NOTHROW(json::parser(s1.c_str()).parse());
CAPTURE(s2);
CHECK_NOTHROW(json::parser(s2.c_str()).parse()); CHECK_NOTHROW(json::parser(s2.c_str()).parse());
CAPTURE(s3);
CHECK_NOTHROW(json::parser(s3.c_str()).parse()); CHECK_NOTHROW(json::parser(s3.c_str()).parse());
CAPTURE(s4);
CHECK_NOTHROW(json::parser(s4.c_str()).parse()); CHECK_NOTHROW(json::parser(s4.c_str()).parse());
} }
else else
{ {
CAPTURE(s1);
CHECK_THROWS_AS(json::parser(s1.c_str()).parse(), json::parse_error); CHECK_THROWS_AS(json::parser(s1.c_str()).parse(), json::parse_error);
CHECK_THROWS_AS(json::parser(s2.c_str()).parse(), json::parse_error); // only check error message if c is not a control character
CHECK_THROWS_AS(json::parser(s3.c_str()).parse(), json::parse_error); if (c > 0x1f)
CHECK_THROWS_AS(json::parser(s4.c_str()).parse(), json::parse_error); {
CHECK_THROWS_WITH(json::parser(s1.c_str()).parse(), CHECK_THROWS_WITH(json::parser(s1.c_str()).parse(),
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); "[json.exception.parse_error.101] parse error at 7: syntax error - invalid string: '\\u' must be followed by 4 hex digits; last read '" + s1.substr(0, 7) + "'");
}
CAPTURE(s2);
CHECK_THROWS_AS(json::parser(s2.c_str()).parse(), json::parse_error);
// only check error message if c is not a control character
if (c > 0x1f)
{
CHECK_THROWS_WITH(json::parser(s2.c_str()).parse(), CHECK_THROWS_WITH(json::parser(s2.c_str()).parse(),
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); "[json.exception.parse_error.101] parse error at 6: syntax error - invalid string: '\\u' must be followed by 4 hex digits; last read '" + s2.substr(0, 6) + "'");
}
CAPTURE(s3);
CHECK_THROWS_AS(json::parser(s3.c_str()).parse(), json::parse_error);
// only check error message if c is not a control character
if (c > 0x1f)
{
CHECK_THROWS_WITH(json::parser(s3.c_str()).parse(), CHECK_THROWS_WITH(json::parser(s3.c_str()).parse(),
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); "[json.exception.parse_error.101] parse error at 5: syntax error - invalid string: '\\u' must be followed by 4 hex digits; last read '" + s3.substr(0, 5) + "'");
}
CAPTURE(s4);
CHECK_THROWS_AS(json::parser(s4.c_str()).parse(), json::parse_error);
// only check error message if c is not a control character
if (c > 0x1f)
{
CHECK_THROWS_WITH(json::parser(s4.c_str()).parse(), CHECK_THROWS_WITH(json::parser(s4.c_str()).parse(),
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); "[json.exception.parse_error.101] parse error at 4: syntax error - invalid string: '\\u' must be followed by 4 hex digits; last read '" + s4.substr(0, 4) + "'");
}
} }
} }
} }
@ -608,17 +641,17 @@ TEST_CASE("parser class")
// missing part of a surrogate pair // missing part of a surrogate pair
CHECK_THROWS_AS(json::parse("\"\\uD80C\""), json::parse_error); CHECK_THROWS_AS(json::parse("\"\\uD80C\""), json::parse_error);
CHECK_THROWS_WITH(json::parse("\"\\uD80C\""), CHECK_THROWS_WITH(json::parse("\"\\uD80C\""),
"[json.exception.parse_error.102] parse error at 8: missing low surrogate"); "[json.exception.parse_error.101] parse error at 8: syntax error - invalid string: missing low surrogate; last read '\"\\uD80C\"'");
// invalid surrogate pair // invalid surrogate pair
CHECK_THROWS_AS(json::parse("\"\\uD80C\\uD80C\""), json::parse_error); CHECK_THROWS_AS(json::parse("\"\\uD80C\\uD80C\""), json::parse_error);
CHECK_THROWS_AS(json::parse("\"\\uD80C\\u0000\""), json::parse_error); CHECK_THROWS_AS(json::parse("\"\\uD80C\\u0000\""), json::parse_error);
CHECK_THROWS_AS(json::parse("\"\\uD80C\\uFFFF\""), json::parse_error); CHECK_THROWS_AS(json::parse("\"\\uD80C\\uFFFF\""), json::parse_error);
CHECK_THROWS_WITH(json::parse("\"\\uD80C\\uD80C\""), CHECK_THROWS_WITH(json::parse("\"\\uD80C\\uD80C\""),
"[json.exception.parse_error.102] parse error at 14: missing or wrong low surrogate"); "[json.exception.parse_error.101] parse error at 13: syntax error - invalid string: invalid low surrogate; last read '\"\\uD80C\\uD80C'");
CHECK_THROWS_WITH(json::parse("\"\\uD80C\\u0000\""), CHECK_THROWS_WITH(json::parse("\"\\uD80C\\u0000\""),
"[json.exception.parse_error.102] parse error at 14: missing or wrong low surrogate"); "[json.exception.parse_error.101] parse error at 13: syntax error - invalid string: invalid low surrogate; last read '\"\\uD80C\\u0000'");
CHECK_THROWS_WITH(json::parse("\"\\uD80C\\uFFFF\""), CHECK_THROWS_WITH(json::parse("\"\\uD80C\\uFFFF\""),
"[json.exception.parse_error.102] parse error at 14: missing or wrong low surrogate"); "[json.exception.parse_error.101] parse error at 13: syntax error - invalid string: invalid low surrogate; last read '\"\\uD80C\\uFFFF'");
} }
SECTION("tests found by mutate++") SECTION("tests found by mutate++")