From ec7a1d834773f9fee90d8ae908a0c9933c5646fc Mon Sep 17 00:00:00 2001 From: Robert Marki Date: Fri, 13 Nov 2015 12:49:26 +0100 Subject: [PATCH] Fix character skipping after a surrogate pair In a string the first character following a surrogate pair is skipped by the lexer, but the rest of the string is parsed as usual. --- src/json.hpp | 4 ++-- src/json.hpp.re2c | 4 ++-- test/unit.cpp | 5 +++++ 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/json.hpp b/src/json.hpp index 4423c28c..1e3cd11f 100644 --- a/src/json.hpp +++ b/src/json.hpp @@ -6856,8 +6856,8 @@ basic_json_parser_59: auto codepoint2 = std::strtoul(std::string(reinterpret_cast (i + 7), 4).c_str(), nullptr, 16); result += to_unicode(codepoint, codepoint2); - // skip the next 11 characters (xxxx\uyyyy) - i += 11; + // skip the next 10 characters (xxxx\uyyyy) + i += 10; } else { diff --git a/src/json.hpp.re2c b/src/json.hpp.re2c index 2fa1a525..84559240 100644 --- a/src/json.hpp.re2c +++ b/src/json.hpp.re2c @@ -6162,8 +6162,8 @@ class basic_json auto codepoint2 = std::strtoul(std::string(reinterpret_cast (i + 7), 4).c_str(), nullptr, 16); result += to_unicode(codepoint, codepoint2); - // skip the next 11 characters (xxxx\uyyyy) - i += 11; + // skip the next 10 characters (xxxx\uyyyy) + i += 10; } else { diff --git a/test/unit.cpp b/test/unit.cpp index c9c1d2e0..86f3a1ce 100644 --- a/test/unit.cpp +++ b/test/unit.cpp @@ -10205,4 +10205,9 @@ TEST_CASE("regression tests") j["string"] = bytes; CHECK(j["string"] == "\u0007\u0007"); } + + SECTION("character following a surrogate pair is skipped") + { + CHECK(json::parse("\"\\ud80c\\udc60abc\"").get() == u8"\U00013060abc"); + } }