Fix character skipping after a surrogate pair
In a string the first character following a surrogate pair is skipped by the lexer, but the rest of the string is parsed as usual.
This commit is contained in:
parent
3948630374
commit
ec7a1d8347
3 changed files with 9 additions and 4 deletions
|
@ -6856,8 +6856,8 @@ basic_json_parser_59:
|
||||||
auto codepoint2 = std::strtoul(std::string(reinterpret_cast<typename string_t::const_pointer>
|
auto codepoint2 = std::strtoul(std::string(reinterpret_cast<typename string_t::const_pointer>
|
||||||
(i + 7), 4).c_str(), nullptr, 16);
|
(i + 7), 4).c_str(), nullptr, 16);
|
||||||
result += to_unicode(codepoint, codepoint2);
|
result += to_unicode(codepoint, codepoint2);
|
||||||
// skip the next 11 characters (xxxx\uyyyy)
|
// skip the next 10 characters (xxxx\uyyyy)
|
||||||
i += 11;
|
i += 10;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
|
|
@ -6162,8 +6162,8 @@ class basic_json
|
||||||
auto codepoint2 = std::strtoul(std::string(reinterpret_cast<typename string_t::const_pointer>
|
auto codepoint2 = std::strtoul(std::string(reinterpret_cast<typename string_t::const_pointer>
|
||||||
(i + 7), 4).c_str(), nullptr, 16);
|
(i + 7), 4).c_str(), nullptr, 16);
|
||||||
result += to_unicode(codepoint, codepoint2);
|
result += to_unicode(codepoint, codepoint2);
|
||||||
// skip the next 11 characters (xxxx\uyyyy)
|
// skip the next 10 characters (xxxx\uyyyy)
|
||||||
i += 11;
|
i += 10;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
|
|
@ -10205,4 +10205,9 @@ TEST_CASE("regression tests")
|
||||||
j["string"] = bytes;
|
j["string"] = bytes;
|
||||||
CHECK(j["string"] == "\u0007\u0007");
|
CHECK(j["string"] == "\u0007\u0007");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SECTION("character following a surrogate pair is skipped")
|
||||||
|
{
|
||||||
|
CHECK(json::parse("\"\\ud80c\\udc60abc\"").get<json::string_t>() == u8"\U00013060abc");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue