From ec7a1d834773f9fee90d8ae908a0c9933c5646fc Mon Sep 17 00:00:00 2001
From: Robert Marki <gsmiko@gmail.com>
Date: Fri, 13 Nov 2015 12:49:26 +0100
Subject: [PATCH] Fix character skipping after a surrogate pair

In a string the first character following a surrogate pair is skipped by the
lexer, but the rest of the string is parsed as usual.
---
 src/json.hpp      | 4 ++--
 src/json.hpp.re2c | 4 ++--
 test/unit.cpp     | 5 +++++
 3 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/src/json.hpp b/src/json.hpp
index 4423c28c..1e3cd11f 100644
--- a/src/json.hpp
+++ b/src/json.hpp
@@ -6856,8 +6856,8 @@ basic_json_parser_59:
                                 auto codepoint2 = std::strtoul(std::string(reinterpret_cast<typename string_t::const_pointer>
                                                                (i + 7), 4).c_str(), nullptr, 16);
                                 result += to_unicode(codepoint, codepoint2);
-                                // skip the next 11 characters (xxxx\uyyyy)
-                                i += 11;
+                                // skip the next 10 characters (xxxx\uyyyy)
+                                i += 10;
                             }
                             else
                             {
diff --git a/src/json.hpp.re2c b/src/json.hpp.re2c
index 2fa1a525..84559240 100644
--- a/src/json.hpp.re2c
+++ b/src/json.hpp.re2c
@@ -6162,8 +6162,8 @@ class basic_json
                                 auto codepoint2 = std::strtoul(std::string(reinterpret_cast<typename string_t::const_pointer>
                                                                (i + 7), 4).c_str(), nullptr, 16);
                                 result += to_unicode(codepoint, codepoint2);
-                                // skip the next 11 characters (xxxx\uyyyy)
-                                i += 11;
+                                // skip the next 10 characters (xxxx\uyyyy)
+                                i += 10;
                             }
                             else
                             {
diff --git a/test/unit.cpp b/test/unit.cpp
index c9c1d2e0..86f3a1ce 100644
--- a/test/unit.cpp
+++ b/test/unit.cpp
@@ -10205,4 +10205,9 @@ TEST_CASE("regression tests")
         j["string"] = bytes;
         CHECK(j["string"] == "\u0007\u0007");
     }
+
+    SECTION("character following a surrogate pair is skipped")
+    {
+        CHECK(json::parse("\"\\ud80c\\udc60abc\"").get<json::string_t>() == u8"\U00013060abc");
+    }
 }