🔨 fixed \uxxxx parsing
This commit is contained in:
parent
f47cf152a8
commit
99e0d8b339
2 changed files with 90 additions and 62 deletions
37
src/json.hpp
37
src/json.hpp
|
@ -42,6 +42,7 @@ SOFTWARE.
|
|||
#include <forward_list> // forward_list
|
||||
#include <functional> // function, hash, less
|
||||
#include <initializer_list> // initializer_list
|
||||
#include <iomanip> // hex
|
||||
#include <iostream> // istream, ostream
|
||||
#include <iterator> // advance, begin, back_inserter, bidirectional_iterator_tag, distance, end, inserter, iterator, iterator_traits, next, random_access_iterator_tag, reverse_iterator
|
||||
#include <limits> // numeric_limits
|
||||
|
@ -10542,36 +10543,30 @@ class basic_json
|
|||
// must be called after \u was read; returns following xxxx as hex or -1 when error
|
||||
int get_codepoint()
|
||||
{
|
||||
// read xxxx of \uxxxx
|
||||
std::vector<char> buffer(5, '\0');
|
||||
// a mapping to discover hex numbers
|
||||
static int8_t ascii_to_hex[256] = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1};
|
||||
|
||||
int codepoint = 0;
|
||||
|
||||
for (size_t i = 0; i < 4; ++i)
|
||||
{
|
||||
get();
|
||||
if (JSON_UNLIKELY(current != std::char_traits<char>::eof()))
|
||||
const int8_t digit = ascii_to_hex[static_cast<unsigned char>(get())];
|
||||
if (JSON_UNLIKELY(digit == -1))
|
||||
{
|
||||
buffer[i] = static_cast<char>(current);
|
||||
return -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
// error message will be created by caller
|
||||
return -1;
|
||||
codepoint += digit;
|
||||
}
|
||||
|
||||
if (i != 3)
|
||||
{
|
||||
codepoint <<= 4;
|
||||
}
|
||||
}
|
||||
|
||||
char* endptr;
|
||||
errno = 0;
|
||||
|
||||
const int codepoint = static_cast<int>(std::strtoul(buffer.data(), &endptr, 16));
|
||||
|
||||
if (JSON_LIKELY(errno == 0 and endptr == buffer.data() + 4))
|
||||
{
|
||||
return codepoint;
|
||||
}
|
||||
else
|
||||
{
|
||||
// conversion incomplete or failure
|
||||
return -1;
|
||||
}
|
||||
return codepoint;
|
||||
}
|
||||
|
||||
token_type scan_string()
|
||||
|
|
|
@ -91,18 +91,18 @@ TEST_CASE("parser class")
|
|||
// error: tab in string
|
||||
CHECK_THROWS_AS(json::parser("\"\t\"").parse(), json::parse_error);
|
||||
CHECK_THROWS_WITH(json::parser("\"\t\"").parse(),
|
||||
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'");
|
||||
"[json.exception.parse_error.101] parse error at 2: syntax error - invalid string: control characters (U+0000 through U+001f) must be escaped; last read '\"<U+0009>'");
|
||||
// error: newline in string
|
||||
CHECK_THROWS_AS(json::parser("\"\n\"").parse(), json::parse_error);
|
||||
CHECK_THROWS_AS(json::parser("\"\r\"").parse(), json::parse_error);
|
||||
CHECK_THROWS_WITH(json::parser("\"\n\"").parse(),
|
||||
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'");
|
||||
"[json.exception.parse_error.101] parse error at 2: syntax error - invalid string: control characters (U+0000 through U+001f) must be escaped; last read '\"<U+000a>'");
|
||||
CHECK_THROWS_WITH(json::parser("\"\r\"").parse(),
|
||||
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'");
|
||||
"[json.exception.parse_error.101] parse error at 2: syntax error - invalid string: control characters (U+0000 through U+001f) must be escaped; last read '\"<U+000d>'");
|
||||
// error: backspace in string
|
||||
CHECK_THROWS_AS(json::parser("\"\b\"").parse(), json::parse_error);
|
||||
CHECK_THROWS_WITH(json::parser("\"\b\"").parse(),
|
||||
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'");
|
||||
"[json.exception.parse_error.101] parse error at 2: syntax error - invalid string: control characters (U+0000 through U+001f) must be escaped; last read '\"<U+0008>'");
|
||||
// improve code coverage
|
||||
CHECK_THROWS_AS(json::parser("\uFF01").parse(), json::parse_error);
|
||||
CHECK_THROWS_AS(json::parser("[-4:1,]").parse(), json::parse_error);
|
||||
|
@ -393,34 +393,37 @@ TEST_CASE("parser class")
|
|||
CHECK_THROWS_AS(json::parser("n").parse(), json::parse_error);
|
||||
CHECK_THROWS_AS(json::parser("nu").parse(), json::parse_error);
|
||||
CHECK_THROWS_AS(json::parser("nul").parse(), json::parse_error);
|
||||
CHECK_THROWS_WITH(json::parser("n").parse(), "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected 'n'");
|
||||
CHECK_THROWS_WITH(json::parser("n").parse(),
|
||||
"[json.exception.parse_error.101] parse error at 2: syntax error - invalid literal; expected 'null'; last read 'n'");
|
||||
CHECK_THROWS_WITH(json::parser("nu").parse(),
|
||||
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected 'n'");
|
||||
"[json.exception.parse_error.101] parse error at 3: syntax error - invalid literal; expected 'null'; last read 'nu'");
|
||||
CHECK_THROWS_WITH(json::parser("nul").parse(),
|
||||
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected 'n'");
|
||||
"[json.exception.parse_error.101] parse error at 4: syntax error - invalid literal; expected 'null'; last read 'nul'");
|
||||
|
||||
// unexpected end of true
|
||||
CHECK_THROWS_AS(json::parser("t").parse(), json::parse_error);
|
||||
CHECK_THROWS_AS(json::parser("tr").parse(), json::parse_error);
|
||||
CHECK_THROWS_AS(json::parser("tru").parse(), json::parse_error);
|
||||
CHECK_THROWS_WITH(json::parser("t").parse(), "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected 't'");
|
||||
CHECK_THROWS_WITH(json::parser("t").parse(),
|
||||
"[json.exception.parse_error.101] parse error at 2: syntax error - invalid literal; expected 'true'; last read 't'");
|
||||
CHECK_THROWS_WITH(json::parser("tr").parse(),
|
||||
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected 't'");
|
||||
"[json.exception.parse_error.101] parse error at 3: syntax error - invalid literal; expected 'true'; last read 'tr'");
|
||||
CHECK_THROWS_WITH(json::parser("tru").parse(),
|
||||
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected 't'");
|
||||
"[json.exception.parse_error.101] parse error at 4: syntax error - invalid literal; expected 'true'; last read 'tru'");
|
||||
|
||||
// unexpected end of false
|
||||
CHECK_THROWS_AS(json::parser("f").parse(), json::parse_error);
|
||||
CHECK_THROWS_AS(json::parser("fa").parse(), json::parse_error);
|
||||
CHECK_THROWS_AS(json::parser("fal").parse(), json::parse_error);
|
||||
CHECK_THROWS_AS(json::parser("fals").parse(), json::parse_error);
|
||||
CHECK_THROWS_WITH(json::parser("f").parse(), "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected 'f'");
|
||||
CHECK_THROWS_WITH(json::parser("f").parse(),
|
||||
"[json.exception.parse_error.101] parse error at 2: syntax error - invalid literal; expected 'false'; last read 'f'");
|
||||
CHECK_THROWS_WITH(json::parser("fa").parse(),
|
||||
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected 'f'");
|
||||
"[json.exception.parse_error.101] parse error at 3: syntax error - invalid literal; expected 'false'; last read 'fa'");
|
||||
CHECK_THROWS_WITH(json::parser("fal").parse(),
|
||||
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected 'f'");
|
||||
"[json.exception.parse_error.101] parse error at 4: syntax error - invalid literal; expected 'false'; last read 'fal'");
|
||||
CHECK_THROWS_WITH(json::parser("fals").parse(),
|
||||
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected 'f'");
|
||||
"[json.exception.parse_error.101] parse error at 5: syntax error - invalid literal; expected 'false'; last read 'fals'");
|
||||
|
||||
// missing/unexpected end of array
|
||||
CHECK_THROWS_AS(json::parser("[").parse(), json::parse_error);
|
||||
|
@ -471,25 +474,25 @@ TEST_CASE("parser class")
|
|||
CHECK_THROWS_AS(json::parser("\"\\u01").parse(), json::parse_error);
|
||||
CHECK_THROWS_AS(json::parser("\"\\u012").parse(), json::parse_error);
|
||||
CHECK_THROWS_WITH(json::parser("\"").parse(),
|
||||
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'");
|
||||
"[json.exception.parse_error.101] parse error at 2: syntax error - invalid string: missing closing quote; last read '\"'");
|
||||
CHECK_THROWS_WITH(json::parser("\"\\\"").parse(),
|
||||
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'");
|
||||
"[json.exception.parse_error.101] parse error at 4: syntax error - invalid string: missing closing quote; last read '\"\\\"'");
|
||||
CHECK_THROWS_WITH(json::parser("\"\\u\"").parse(),
|
||||
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'");
|
||||
"[json.exception.parse_error.101] parse error at 4: syntax error - invalid string: '\\u' must be followed by 4 hex digits; last read '\"\\u\"'");
|
||||
CHECK_THROWS_WITH(json::parser("\"\\u0\"").parse(),
|
||||
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'");
|
||||
"[json.exception.parse_error.101] parse error at 5: syntax error - invalid string: '\\u' must be followed by 4 hex digits; last read '\"\\u0\"'");
|
||||
CHECK_THROWS_WITH(json::parser("\"\\u01\"").parse(),
|
||||
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'");
|
||||
"[json.exception.parse_error.101] parse error at 6: syntax error - invalid string: '\\u' must be followed by 4 hex digits; last read '\"\\u01\"'");
|
||||
CHECK_THROWS_WITH(json::parser("\"\\u012\"").parse(),
|
||||
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'");
|
||||
"[json.exception.parse_error.101] parse error at 7: syntax error - invalid string: '\\u' must be followed by 4 hex digits; last read '\"\\u012\"'");
|
||||
CHECK_THROWS_WITH(json::parser("\"\\u").parse(),
|
||||
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'");
|
||||
"[json.exception.parse_error.101] parse error at 4: syntax error - invalid string: '\\u' must be followed by 4 hex digits; last read '\"\\u'");
|
||||
CHECK_THROWS_WITH(json::parser("\"\\u0").parse(),
|
||||
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'");
|
||||
"[json.exception.parse_error.101] parse error at 5: syntax error - invalid string: '\\u' must be followed by 4 hex digits; last read '\"\\u0'");
|
||||
CHECK_THROWS_WITH(json::parser("\"\\u01").parse(),
|
||||
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'");
|
||||
"[json.exception.parse_error.101] parse error at 6: syntax error - invalid string: '\\u' must be followed by 4 hex digits; last read '\"\\u01'");
|
||||
CHECK_THROWS_WITH(json::parser("\"\\u012").parse(),
|
||||
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'");
|
||||
"[json.exception.parse_error.101] parse error at 7: syntax error - invalid string: '\\u' must be followed by 4 hex digits; last read '\"\\u012'");
|
||||
|
||||
// invalid escapes
|
||||
for (int c = 1; c < 128; ++c)
|
||||
|
@ -522,8 +525,12 @@ TEST_CASE("parser class")
|
|||
default:
|
||||
{
|
||||
CHECK_THROWS_AS(json::parser(s.c_str()).parse(), json::parse_error);
|
||||
CHECK_THROWS_WITH(json::parser(s.c_str()).parse(),
|
||||
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'");
|
||||
// only check error message if c is not a control character
|
||||
if (c > 0x1f)
|
||||
{
|
||||
CHECK_THROWS_WITH(json::parser(s.c_str()).parse(),
|
||||
"[json.exception.parse_error.101] parse error at 3: syntax error - invalid string: forbidden character after backspace; last read '\"\\" + std::string(1, c) + "'");
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -581,26 +588,52 @@ TEST_CASE("parser class")
|
|||
|
||||
if (valid(c))
|
||||
{
|
||||
CAPTURE(s1);
|
||||
CHECK_NOTHROW(json::parser(s1.c_str()).parse());
|
||||
CAPTURE(s2);
|
||||
CHECK_NOTHROW(json::parser(s2.c_str()).parse());
|
||||
CAPTURE(s3);
|
||||
CHECK_NOTHROW(json::parser(s3.c_str()).parse());
|
||||
CAPTURE(s4);
|
||||
CHECK_NOTHROW(json::parser(s4.c_str()).parse());
|
||||
}
|
||||
else
|
||||
{
|
||||
CAPTURE(s1);
|
||||
CHECK_THROWS_AS(json::parser(s1.c_str()).parse(), json::parse_error);
|
||||
CHECK_THROWS_AS(json::parser(s2.c_str()).parse(), json::parse_error);
|
||||
CHECK_THROWS_AS(json::parser(s3.c_str()).parse(), json::parse_error);
|
||||
CHECK_THROWS_AS(json::parser(s4.c_str()).parse(), json::parse_error);
|
||||
// only check error message if c is not a control character
|
||||
if (c > 0x1f)
|
||||
{
|
||||
CHECK_THROWS_WITH(json::parser(s1.c_str()).parse(),
|
||||
"[json.exception.parse_error.101] parse error at 7: syntax error - invalid string: '\\u' must be followed by 4 hex digits; last read '" + s1.substr(0, 7) + "'");
|
||||
}
|
||||
|
||||
CHECK_THROWS_WITH(json::parser(s1.c_str()).parse(),
|
||||
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'");
|
||||
CHECK_THROWS_WITH(json::parser(s2.c_str()).parse(),
|
||||
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'");
|
||||
CHECK_THROWS_WITH(json::parser(s3.c_str()).parse(),
|
||||
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'");
|
||||
CHECK_THROWS_WITH(json::parser(s4.c_str()).parse(),
|
||||
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'");
|
||||
CAPTURE(s2);
|
||||
CHECK_THROWS_AS(json::parser(s2.c_str()).parse(), json::parse_error);
|
||||
// only check error message if c is not a control character
|
||||
if (c > 0x1f)
|
||||
{
|
||||
CHECK_THROWS_WITH(json::parser(s2.c_str()).parse(),
|
||||
"[json.exception.parse_error.101] parse error at 6: syntax error - invalid string: '\\u' must be followed by 4 hex digits; last read '" + s2.substr(0, 6) + "'");
|
||||
}
|
||||
|
||||
CAPTURE(s3);
|
||||
CHECK_THROWS_AS(json::parser(s3.c_str()).parse(), json::parse_error);
|
||||
// only check error message if c is not a control character
|
||||
if (c > 0x1f)
|
||||
{
|
||||
CHECK_THROWS_WITH(json::parser(s3.c_str()).parse(),
|
||||
"[json.exception.parse_error.101] parse error at 5: syntax error - invalid string: '\\u' must be followed by 4 hex digits; last read '" + s3.substr(0, 5) + "'");
|
||||
}
|
||||
|
||||
CAPTURE(s4);
|
||||
CHECK_THROWS_AS(json::parser(s4.c_str()).parse(), json::parse_error);
|
||||
// only check error message if c is not a control character
|
||||
if (c > 0x1f)
|
||||
{
|
||||
CHECK_THROWS_WITH(json::parser(s4.c_str()).parse(),
|
||||
"[json.exception.parse_error.101] parse error at 4: syntax error - invalid string: '\\u' must be followed by 4 hex digits; last read '" + s4.substr(0, 4) + "'");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -608,17 +641,17 @@ TEST_CASE("parser class")
|
|||
// missing part of a surrogate pair
|
||||
CHECK_THROWS_AS(json::parse("\"\\uD80C\""), json::parse_error);
|
||||
CHECK_THROWS_WITH(json::parse("\"\\uD80C\""),
|
||||
"[json.exception.parse_error.102] parse error at 8: missing low surrogate");
|
||||
"[json.exception.parse_error.101] parse error at 8: syntax error - invalid string: missing low surrogate; last read '\"\\uD80C\"'");
|
||||
// invalid surrogate pair
|
||||
CHECK_THROWS_AS(json::parse("\"\\uD80C\\uD80C\""), json::parse_error);
|
||||
CHECK_THROWS_AS(json::parse("\"\\uD80C\\u0000\""), json::parse_error);
|
||||
CHECK_THROWS_AS(json::parse("\"\\uD80C\\uFFFF\""), json::parse_error);
|
||||
CHECK_THROWS_WITH(json::parse("\"\\uD80C\\uD80C\""),
|
||||
"[json.exception.parse_error.102] parse error at 14: missing or wrong low surrogate");
|
||||
"[json.exception.parse_error.101] parse error at 13: syntax error - invalid string: invalid low surrogate; last read '\"\\uD80C\\uD80C'");
|
||||
CHECK_THROWS_WITH(json::parse("\"\\uD80C\\u0000\""),
|
||||
"[json.exception.parse_error.102] parse error at 14: missing or wrong low surrogate");
|
||||
"[json.exception.parse_error.101] parse error at 13: syntax error - invalid string: invalid low surrogate; last read '\"\\uD80C\\u0000'");
|
||||
CHECK_THROWS_WITH(json::parse("\"\\uD80C\\uFFFF\""),
|
||||
"[json.exception.parse_error.102] parse error at 14: missing or wrong low surrogate");
|
||||
"[json.exception.parse_error.101] parse error at 13: syntax error - invalid string: invalid low surrogate; last read '\"\\uD80C\\uFFFF'");
|
||||
}
|
||||
|
||||
SECTION("tests found by mutate++")
|
||||
|
|
Loading…
Reference in a new issue