From 6d2c0a79287c8013f16c2674d37f52a4ccd93be6 Mon Sep 17 00:00:00 2001
From: Niels Lohmann <mail@nlohmann.me>
Date: Sun, 23 Apr 2017 22:54:21 +0200
Subject: [PATCH] :white_check_mark: added more Unicode test cases

---
 test/src/unit-unicode.cpp | 1237 +++++++++++++++++++------------------
 1 file changed, 631 insertions(+), 606 deletions(-)

diff --git a/test/src/unit-unicode.cpp b/test/src/unit-unicode.cpp
index 7b94db67..67e97346 100644
--- a/test/src/unit-unicode.cpp
+++ b/test/src/unit-unicode.cpp
@@ -74,523 +74,525 @@ void check_utf8string(bool success_expected, int byte1, int byte2 = -1, int byte
     }
 }
 
-TEST_CASE("RFC 3629", "[hide]")
+TEST_CASE("Unicode", "[hide]")
 {
-    /*
-    RFC 3629 describes in Sect. 4 the syntax of UTF-8 byte sequences as
-    follows:
-
-        A UTF-8 string is a sequence of octets representing a sequence of UCS
-        characters.  An octet sequence is valid UTF-8 only if it matches the
-        following syntax, which is derived from the rules for encoding UTF-8
-        and is expressed in the ABNF of [RFC2234].
-
-        UTF8-octets = *( UTF8-char )
-        UTF8-char   = UTF8-1 / UTF8-2 / UTF8-3 / UTF8-4
-        UTF8-1      = %x00-7F
-        UTF8-2      = %xC2-DF UTF8-tail
-        UTF8-3      = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
-                      %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
-        UTF8-4      = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
-                      %xF4 %x80-8F 2( UTF8-tail )
-        UTF8-tail   = %x80-BF
-    */
-
-    SECTION("ill-formed first byte")
+    SECTION("RFC 3629")
     {
-        for (int byte1 = 0x80; byte1 <= 0xC1; ++byte1)
-        {
-            check_utf8string(false, byte1);
-        }
+        /*
+        RFC 3629 describes in Sect. 4 the syntax of UTF-8 byte sequences as
+        follows:
 
-        for (int byte1 = 0xF5; byte1 <= 0xFF; ++byte1)
-        {
-            check_utf8string(false, byte1);
-        }
-    }
+            A UTF-8 string is a sequence of octets representing a sequence of UCS
+            characters.  An octet sequence is valid UTF-8 only if it matches the
+            following syntax, which is derived from the rules for encoding UTF-8
+            and is expressed in the ABNF of [RFC2234].
 
-    SECTION("UTF8-1 (x00-x7F)")
-    {
-        SECTION("well-formed")
+            UTF8-octets = *( UTF8-char )
+            UTF8-char   = UTF8-1 / UTF8-2 / UTF8-3 / UTF8-4
+            UTF8-1      = %x00-7F
+            UTF8-2      = %xC2-DF UTF8-tail
+            UTF8-3      = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
+                          %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
+            UTF8-4      = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
+                          %xF4 %x80-8F 2( UTF8-tail )
+            UTF8-tail   = %x80-BF
+        */
+
+        SECTION("ill-formed first byte")
         {
-            for (int byte1 = 0x00; byte1 <= 0x7F; ++byte1)
+            for (int byte1 = 0x80; byte1 <= 0xC1; ++byte1)
             {
-                // unescaped control characters are parse errors in JSON
-                if (0x00 <= byte1 and byte1 <= 0x1F)
+                check_utf8string(false, byte1);
+            }
+
+            for (int byte1 = 0xF5; byte1 <= 0xFF; ++byte1)
+            {
+                check_utf8string(false, byte1);
+            }
+        }
+
+        SECTION("UTF8-1 (x00-x7F)")
+        {
+            SECTION("well-formed")
+            {
+                for (int byte1 = 0x00; byte1 <= 0x7F; ++byte1)
+                {
+                    // unescaped control characters are parse errors in JSON
+                    if (0x00 <= byte1 and byte1 <= 0x1F)
+                    {
+                        check_utf8string(false, byte1);
+                        continue;
+                    }
+
+                    // a single quote is a parse error in JSON
+                    if (byte1 == 0x22)
+                    {
+                        check_utf8string(false, byte1);
+                        continue;
+                    }
+
+                    // a single backslash is a parse error in JSON
+                    if (byte1 == 0x5C)
+                    {
+                        check_utf8string(false, byte1);
+                        continue;
+                    }
+
+                    // all other characters are OK
+                    check_utf8string(true, byte1);
+                }
+            }
+        }
+
+        SECTION("UTF8-2 (xC2-xDF UTF8-tail)")
+        {
+            SECTION("well-formed")
+            {
+                for (int byte1 = 0xC2; byte1 <= 0xDF; ++byte1)
+                {
+                    for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2)
+                    {
+                        check_utf8string(true, byte1, byte2);
+                    }
+                }
+            }
+
+            SECTION("ill-formed: missing second byte")
+            {
+                for (int byte1 = 0xC2; byte1 <= 0xDF; ++byte1)
                 {
                     check_utf8string(false, byte1);
-                    continue;
                 }
+            }
 
-                // a single quote is a parse error in JSON
-                if (byte1 == 0x22)
+            SECTION("ill-formed: wrong second byte")
+            {
+                for (int byte1 = 0xC2; byte1 <= 0xDF; ++byte1)
+                {
+                    for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2)
+                    {
+                        // skip correct second byte
+                        if (0x80 <= byte2 and byte2 <= 0xBF)
+                        {
+                            continue;
+                        }
+
+                        check_utf8string(false, byte1, byte2);
+                    }
+                }
+            }
+        }
+
+        SECTION("UTF8-3 (xE0 xA0-BF UTF8-tail)")
+        {
+            SECTION("well-formed")
+            {
+                for (int byte1 = 0xE0; byte1 <= 0xE0; ++byte1)
+                {
+                    for (int byte2 = 0xA0; byte2 <= 0xBF; ++byte2)
+                    {
+                        for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
+                        {
+                            check_utf8string(true, byte1, byte2, byte3);
+                        }
+                    }
+                }
+            }
+
+            SECTION("ill-formed: missing second byte")
+            {
+                for (int byte1 = 0xE0; byte1 <= 0xE0; ++byte1)
                 {
                     check_utf8string(false, byte1);
-                    continue;
                 }
+            }
 
-                // a single backslash is a parse error in JSON
-                if (byte1 == 0x5C)
+            SECTION("ill-formed: missing third byte")
+            {
+                for (int byte1 = 0xE0; byte1 <= 0xE0; ++byte1)
+                {
+                    for (int byte2 = 0xA0; byte2 <= 0xBF; ++byte2)
+                    {
+                        check_utf8string(false, byte1, byte2);
+                    }
+                }
+            }
+
+            SECTION("ill-formed: wrong second byte")
+            {
+                for (int byte1 = 0xE0; byte1 <= 0xE0; ++byte1)
+                {
+                    for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2)
+                    {
+                        // skip correct second byte
+                        if (0xA0 <= byte2 and byte2 <= 0xBF)
+                        {
+                            continue;
+                        }
+
+                        for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
+                        {
+                            check_utf8string(false, byte1, byte2, byte3);
+                        }
+                    }
+                }
+            }
+
+            SECTION("ill-formed: wrong third byte")
+            {
+                for (int byte1 = 0xE0; byte1 <= 0xE0; ++byte1)
+                {
+                    for (int byte2 = 0xA0; byte2 <= 0xBF; ++byte2)
+                    {
+                        for (int byte3 = 0x00; byte3 <= 0xFF; ++byte3)
+                        {
+                            // skip correct third byte
+                            if (0x80 <= byte3 and byte3 <= 0xBF)
+                            {
+                                continue;
+                            }
+
+                            check_utf8string(false, byte1, byte2, byte3);
+                        }
+                    }
+                }
+            }
+        }
+
+        SECTION("UTF8-3 (xE1-xEC UTF8-tail UTF8-tail)")
+        {
+            SECTION("well-formed")
+            {
+                for (int byte1 = 0xE1; byte1 <= 0xEC; ++byte1)
+                {
+                    for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2)
+                    {
+                        for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
+                        {
+                            check_utf8string(true, byte1, byte2, byte3);
+                        }
+                    }
+                }
+            }
+
+            SECTION("ill-formed: missing second byte")
+            {
+                for (int byte1 = 0xE1; byte1 <= 0xEC; ++byte1)
                 {
                     check_utf8string(false, byte1);
-                    continue;
-                }
-
-                // all other characters are OK
-                check_utf8string(true, byte1);
-            }
-        }
-    }
-
-    SECTION("UTF8-2 (xC2-xDF UTF8-tail)")
-    {
-        SECTION("well-formed")
-        {
-            for (int byte1 = 0xC2; byte1 <= 0xDF; ++byte1)
-            {
-                for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2)
-                {
-                    check_utf8string(true, byte1, byte2);
                 }
             }
-        }
 
-        SECTION("ill-formed: missing second byte")
-        {
-            for (int byte1 = 0xC2; byte1 <= 0xDF; ++byte1)
+            SECTION("ill-formed: missing third byte")
             {
-                check_utf8string(false, byte1);
-            }
-        }
-
-        SECTION("ill-formed: wrong second byte")
-        {
-            for (int byte1 = 0xC2; byte1 <= 0xDF; ++byte1)
-            {
-                for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2)
+                for (int byte1 = 0xE1; byte1 <= 0xEC; ++byte1)
                 {
-                    // skip correct second byte
-                    if (0x80 <= byte2 and byte2 <= 0xBF)
+                    for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2)
                     {
-                        continue;
-                    }
-
-                    check_utf8string(false, byte1, byte2);
-                }
-            }
-        }
-    }
-
-    SECTION("UTF8-3 (xE0 xA0-BF UTF8-tail)")
-    {
-        SECTION("well-formed")
-        {
-            for (int byte1 = 0xE0; byte1 <= 0xE0; ++byte1)
-            {
-                for (int byte2 = 0xA0; byte2 <= 0xBF; ++byte2)
-                {
-                    for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
-                    {
-                        check_utf8string(true, byte1, byte2, byte3);
+                        check_utf8string(false, byte1, byte2);
                     }
                 }
             }
-        }
 
-        SECTION("ill-formed: missing second byte")
-        {
-            for (int byte1 = 0xE0; byte1 <= 0xE0; ++byte1)
+            SECTION("ill-formed: wrong second byte")
             {
-                check_utf8string(false, byte1);
-            }
-        }
-
-        SECTION("ill-formed: missing third byte")
-        {
-            for (int byte1 = 0xE0; byte1 <= 0xE0; ++byte1)
-            {
-                for (int byte2 = 0xA0; byte2 <= 0xBF; ++byte2)
+                for (int byte1 = 0xE1; byte1 <= 0xEC; ++byte1)
                 {
-                    check_utf8string(false, byte1, byte2);
-                }
-            }
-        }
-
-        SECTION("ill-formed: wrong second byte")
-        {
-            for (int byte1 = 0xE0; byte1 <= 0xE0; ++byte1)
-            {
-                for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2)
-                {
-                    // skip correct second byte
-                    if (0xA0 <= byte2 and byte2 <= 0xBF)
+                    for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2)
                     {
-                        continue;
-                    }
-
-                    for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
-                    {
-                        check_utf8string(false, byte1, byte2, byte3);
-                    }
-                }
-            }
-        }
-
-        SECTION("ill-formed: wrong third byte")
-        {
-            for (int byte1 = 0xE0; byte1 <= 0xE0; ++byte1)
-            {
-                for (int byte2 = 0xA0; byte2 <= 0xBF; ++byte2)
-                {
-                    for (int byte3 = 0x00; byte3 <= 0xFF; ++byte3)
-                    {
-                        // skip correct third byte
-                        if (0x80 <= byte3 and byte3 <= 0xBF)
+                        // skip correct second byte
+                        if (0x80 <= byte2 and byte2 <= 0xBF)
                         {
                             continue;
                         }
 
-                        check_utf8string(false, byte1, byte2, byte3);
+                        for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
+                        {
+                            check_utf8string(false, byte1, byte2, byte3);
+                        }
                     }
                 }
             }
-        }
-    }
 
-    SECTION("UTF8-3 (xE1-xEC UTF8-tail UTF8-tail)")
-    {
-        SECTION("well-formed")
-        {
-            for (int byte1 = 0xE1; byte1 <= 0xEC; ++byte1)
+            SECTION("ill-formed: wrong third byte")
             {
-                for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2)
+                for (int byte1 = 0xE1; byte1 <= 0xEC; ++byte1)
                 {
-                    for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
+                    for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2)
                     {
-                        check_utf8string(true, byte1, byte2, byte3);
+                        for (int byte3 = 0x00; byte3 <= 0xFF; ++byte3)
+                        {
+                            // skip correct third byte
+                            if (0x80 <= byte3 and byte3 <= 0xBF)
+                            {
+                                continue;
+                            }
+
+                            check_utf8string(false, byte1, byte2, byte3);
+                        }
                     }
                 }
             }
         }
 
-        SECTION("ill-formed: missing second byte")
+        SECTION("UTF8-3 (xED x80-9F UTF8-tail)")
         {
-            for (int byte1 = 0xE1; byte1 <= 0xEC; ++byte1)
+            SECTION("well-formed")
             {
-                check_utf8string(false, byte1);
-            }
-        }
-
-        SECTION("ill-formed: missing third byte")
-        {
-            for (int byte1 = 0xE1; byte1 <= 0xEC; ++byte1)
-            {
-                for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2)
+                for (int byte1 = 0xED; byte1 <= 0xED; ++byte1)
                 {
-                    check_utf8string(false, byte1, byte2);
-                }
-            }
-        }
-
-        SECTION("ill-formed: wrong second byte")
-        {
-            for (int byte1 = 0xE1; byte1 <= 0xEC; ++byte1)
-            {
-                for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2)
-                {
-                    // skip correct second byte
-                    if (0x80 <= byte2 and byte2 <= 0xBF)
+                    for (int byte2 = 0x80; byte2 <= 0x9F; ++byte2)
                     {
-                        continue;
-                    }
-
-                    for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
-                    {
-                        check_utf8string(false, byte1, byte2, byte3);
+                        for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
+                        {
+                            check_utf8string(true, byte1, byte2, byte3);
+                        }
                     }
                 }
             }
-        }
 
-        SECTION("ill-formed: wrong third byte")
-        {
-            for (int byte1 = 0xE1; byte1 <= 0xEC; ++byte1)
+            SECTION("ill-formed: missing second byte")
             {
-                for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2)
+                for (int byte1 = 0xED; byte1 <= 0xED; ++byte1)
                 {
-                    for (int byte3 = 0x00; byte3 <= 0xFF; ++byte3)
+                    check_utf8string(false, byte1);
+                }
+            }
+
+            SECTION("ill-formed: missing third byte")
+            {
+                for (int byte1 = 0xED; byte1 <= 0xED; ++byte1)
+                {
+                    for (int byte2 = 0x80; byte2 <= 0x9F; ++byte2)
                     {
-                        // skip correct third byte
-                        if (0x80 <= byte3 and byte3 <= 0xBF)
+                        check_utf8string(false, byte1, byte2);
+                    }
+                }
+            }
+
+            SECTION("ill-formed: wrong second byte")
+            {
+                for (int byte1 = 0xED; byte1 <= 0xED; ++byte1)
+                {
+                    for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2)
+                    {
+                        // skip correct second byte
+                        if (0x80 <= byte2 and byte2 <= 0x9F)
                         {
                             continue;
                         }
 
-                        check_utf8string(false, byte1, byte2, byte3);
+                        for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
+                        {
+                            check_utf8string(false, byte1, byte2, byte3);
+                        }
                     }
                 }
             }
-        }
-    }
 
-    SECTION("UTF8-3 (xED x80-9F UTF8-tail)")
-    {
-        SECTION("well-formed")
-        {
-            for (int byte1 = 0xED; byte1 <= 0xED; ++byte1)
+            SECTION("ill-formed: wrong third byte")
             {
-                for (int byte2 = 0x80; byte2 <= 0x9F; ++byte2)
+                for (int byte1 = 0xED; byte1 <= 0xED; ++byte1)
                 {
-                    for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
+                    for (int byte2 = 0x80; byte2 <= 0x9F; ++byte2)
                     {
-                        check_utf8string(true, byte1, byte2, byte3);
+                        for (int byte3 = 0x00; byte3 <= 0xFF; ++byte3)
+                        {
+                            // skip correct third byte
+                            if (0x80 <= byte3 and byte3 <= 0xBF)
+                            {
+                                continue;
+                            }
+
+                            check_utf8string(false, byte1, byte2, byte3);
+                        }
                     }
                 }
             }
         }
 
-        SECTION("ill-formed: missing second byte")
+        SECTION("UTF8-3 (xEE-xEF UTF8-tail UTF8-tail)")
         {
-            for (int byte1 = 0xED; byte1 <= 0xED; ++byte1)
+            SECTION("well-formed")
             {
-                check_utf8string(false, byte1);
-            }
-        }
-
-        SECTION("ill-formed: missing third byte")
-        {
-            for (int byte1 = 0xED; byte1 <= 0xED; ++byte1)
-            {
-                for (int byte2 = 0x80; byte2 <= 0x9F; ++byte2)
+                for (int byte1 = 0xEE; byte1 <= 0xEF; ++byte1)
                 {
-                    check_utf8string(false, byte1, byte2);
-                }
-            }
-        }
-
-        SECTION("ill-formed: wrong second byte")
-        {
-            for (int byte1 = 0xED; byte1 <= 0xED; ++byte1)
-            {
-                for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2)
-                {
-                    // skip correct second byte
-                    if (0x80 <= byte2 and byte2 <= 0x9F)
+                    for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2)
                     {
-                        continue;
-                    }
-
-                    for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
-                    {
-                        check_utf8string(false, byte1, byte2, byte3);
+                        for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
+                        {
+                            check_utf8string(true, byte1, byte2, byte3);
+                        }
                     }
                 }
             }
-        }
 
-        SECTION("ill-formed: wrong third byte")
-        {
-            for (int byte1 = 0xED; byte1 <= 0xED; ++byte1)
+            SECTION("ill-formed: missing second byte")
             {
-                for (int byte2 = 0x80; byte2 <= 0x9F; ++byte2)
+                for (int byte1 = 0xEE; byte1 <= 0xEF; ++byte1)
                 {
-                    for (int byte3 = 0x00; byte3 <= 0xFF; ++byte3)
+                    check_utf8string(false, byte1);
+                }
+            }
+
+            SECTION("ill-formed: missing third byte")
+            {
+                for (int byte1 = 0xEE; byte1 <= 0xEF; ++byte1)
+                {
+                    for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2)
                     {
-                        // skip correct third byte
-                        if (0x80 <= byte3 and byte3 <= 0xBF)
+                        check_utf8string(false, byte1, byte2);
+                    }
+                }
+            }
+
+            SECTION("ill-formed: wrong second byte")
+            {
+                for (int byte1 = 0xEE; byte1 <= 0xEF; ++byte1)
+                {
+                    for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2)
+                    {
+                        // skip correct second byte
+                        if (0x80 <= byte2 and byte2 <= 0xBF)
                         {
                             continue;
                         }
 
-                        check_utf8string(false, byte1, byte2, byte3);
+                        for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
+                        {
+                            check_utf8string(false, byte1, byte2, byte3);
+                        }
                     }
                 }
             }
-        }
-    }
 
-    SECTION("UTF8-3 (xEE-xEF UTF8-tail UTF8-tail)")
-    {
-        SECTION("well-formed")
-        {
-            for (int byte1 = 0xEE; byte1 <= 0xEF; ++byte1)
+            SECTION("ill-formed: wrong third byte")
             {
-                for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2)
+                for (int byte1 = 0xEE; byte1 <= 0xEF; ++byte1)
                 {
-                    for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
+                    for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2)
                     {
-                        check_utf8string(true, byte1, byte2, byte3);
+                        for (int byte3 = 0x00; byte3 <= 0xFF; ++byte3)
+                        {
+                            // skip correct third byte
+                            if (0x80 <= byte3 and byte3 <= 0xBF)
+                            {
+                                continue;
+                            }
+
+                            check_utf8string(false, byte1, byte2, byte3);
+                        }
                     }
                 }
             }
         }
 
-        SECTION("ill-formed: missing second byte")
+        SECTION("UTF8-4 (xF0 x90-BF UTF8-tail UTF8-tail)")
         {
-            for (int byte1 = 0xEE; byte1 <= 0xEF; ++byte1)
+            SECTION("well-formed")
             {
-                check_utf8string(false, byte1);
-            }
-        }
-
-        SECTION("ill-formed: missing third byte")
-        {
-            for (int byte1 = 0xEE; byte1 <= 0xEF; ++byte1)
-            {
-                for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2)
+                for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1)
                 {
-                    check_utf8string(false, byte1, byte2);
-                }
-            }
-        }
-
-        SECTION("ill-formed: wrong second byte")
-        {
-            for (int byte1 = 0xEE; byte1 <= 0xEF; ++byte1)
-            {
-                for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2)
-                {
-                    // skip correct second byte
-                    if (0x80 <= byte2 and byte2 <= 0xBF)
+                    for (int byte2 = 0x90; byte2 <= 0xBF; ++byte2)
                     {
-                        continue;
-                    }
-
-                    for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
-                    {
-                        check_utf8string(false, byte1, byte2, byte3);
+                        for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
+                        {
+                            for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4)
+                            {
+                                check_utf8string(true, byte1, byte2, byte3, byte4);
+                            }
+                        }
                     }
                 }
             }
-        }
 
-        SECTION("ill-formed: wrong third byte")
-        {
-            for (int byte1 = 0xEE; byte1 <= 0xEF; ++byte1)
+            SECTION("ill-formed: missing second byte")
             {
-                for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2)
+                for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1)
                 {
-                    for (int byte3 = 0x00; byte3 <= 0xFF; ++byte3)
+                    check_utf8string(false, byte1);
+                }
+            }
+
+            SECTION("ill-formed: missing third byte")
+            {
+                for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1)
+                {
+                    for (int byte2 = 0x90; byte2 <= 0xBF; ++byte2)
                     {
-                        // skip correct third byte
-                        if (0x80 <= byte3 and byte3 <= 0xBF)
+                        check_utf8string(false, byte1, byte2);
+                    }
+                }
+            }
+
+            SECTION("ill-formed: missing fourth byte")
+            {
+                for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1)
+                {
+                    for (int byte2 = 0x90; byte2 <= 0xBF; ++byte2)
+                    {
+                        for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
+                        {
+                            check_utf8string(false, byte1, byte2, byte3);
+                        }
+                    }
+                }
+            }
+
+            SECTION("ill-formed: wrong second byte")
+            {
+                for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1)
+                {
+                    for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2)
+                    {
+                        // skip correct second byte
+                        if (0x90 <= byte2 and byte2 <= 0xBF)
                         {
                             continue;
                         }
 
-                        check_utf8string(false, byte1, byte2, byte3);
-                    }
-                }
-            }
-        }
-    }
-
-    SECTION("UTF8-4 (xF0 x90-BF UTF8-tail UTF8-tail)")
-    {
-        SECTION("well-formed")
-        {
-            for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1)
-            {
-                for (int byte2 = 0x90; byte2 <= 0xBF; ++byte2)
-                {
-                    for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
-                    {
-                        for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4)
+                        for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
                         {
-                            check_utf8string(true, byte1, byte2, byte3, byte4);
+                            for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4)
+                            {
+                                check_utf8string(false, byte1, byte2, byte3, byte4);
+                            }
                         }
                     }
                 }
             }
-        }
 
-        SECTION("ill-formed: missing second byte")
-        {
-            for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1)
+            SECTION("ill-formed: wrong third byte")
             {
-                check_utf8string(false, byte1);
-            }
-        }
-
-        SECTION("ill-formed: missing third byte")
-        {
-            for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1)
-            {
-                for (int byte2 = 0x90; byte2 <= 0xBF; ++byte2)
+                for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1)
                 {
-                    check_utf8string(false, byte1, byte2);
-                }
-            }
-        }
-
-        SECTION("ill-formed: missing fourth byte")
-        {
-            for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1)
-            {
-                for (int byte2 = 0x90; byte2 <= 0xBF; ++byte2)
-                {
-                    for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
+                    for (int byte2 = 0x90; byte2 <= 0xBF; ++byte2)
                     {
-                        check_utf8string(false, byte1, byte2, byte3);
-                    }
-                }
-            }
-        }
-
-        SECTION("ill-formed: wrong second byte")
-        {
-            for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1)
-            {
-                for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2)
-                {
-                    // skip correct second byte
-                    if (0x90 <= byte2 and byte2 <= 0xBF)
-                    {
-                        continue;
-                    }
-
-                    for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
-                    {
-                        for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4)
+                        for (int byte3 = 0x00; byte3 <= 0xFF; ++byte3)
                         {
-                            check_utf8string(false, byte1, byte2, byte3, byte4);
+                            // skip correct third byte
+                            if (0x80 <= byte3 and byte3 <= 0xBF)
+                            {
+                                continue;
+                            }
+
+                            for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4)
+                            {
+                                check_utf8string(false, byte1, byte2, byte3, byte4);
+                            }
                         }
                     }
                 }
             }
-        }
 
-        SECTION("ill-formed: wrong third byte")
-        {
-            for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1)
+            SECTION("ill-formed: wrong fourth byte")
             {
-                for (int byte2 = 0x90; byte2 <= 0xBF; ++byte2)
+                for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1)
                 {
-                    for (int byte3 = 0x00; byte3 <= 0xFF; ++byte3)
+                    for (int byte2 = 0x90; byte2 <= 0xBF; ++byte2)
                     {
-                        // skip correct third byte
-                        if (0x80 <= byte3 and byte3 <= 0xBF)
+                        for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
                         {
-                            continue;
-                        }
-
-                        for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4)
-                        {
-                            check_utf8string(false, byte1, byte2, byte3, byte4);
-                        }
-                    }
-                }
-            }
-        }
-
-        SECTION("ill-formed: wrong fourth byte")
-        {
-            for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1)
-            {
-                for (int byte2 = 0x90; byte2 <= 0xBF; ++byte2)
-                {
-                    for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
-                    {
                             for (int byte4 = 0x00; byte4 <= 0xFF; ++byte4)
                             {
                                 // skip fourth second byte
@@ -601,269 +603,263 @@ TEST_CASE("RFC 3629", "[hide]")
 
                                 check_utf8string(false, byte1, byte2, byte3, byte4);
                             }
-                    }
-                }
-            }
-        }
-    }
-
-    SECTION("UTF8-4 (xF1-F3 UTF8-tail UTF8-tail UTF8-tail)")
-    {
-        SECTION("well-formed")
-        {
-            for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1)
-            {
-                for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2)
-                {
-                    for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
-                    {
-                        for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4)
-                        {
-                            check_utf8string(true, byte1, byte2, byte3, byte4);
                         }
                     }
                 }
             }
         }
 
-        SECTION("ill-formed: missing second byte")
+        SECTION("UTF8-4 (xF1-F3 UTF8-tail UTF8-tail UTF8-tail)")
         {
-            for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1)
+            SECTION("well-formed")
             {
-                check_utf8string(false, byte1);
-            }
-        }
-
-        SECTION("ill-formed: missing third byte")
-        {
-            for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1)
-            {
-                for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2)
+                for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1)
                 {
-                    check_utf8string(false, byte1, byte2);
-                }
-            }
-        }
-
-        SECTION("ill-formed: missing fourth byte")
-        {
-            for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1)
-            {
-                for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2)
-                {
-                    for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
+                    for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2)
                     {
-                        check_utf8string(false, byte1, byte2, byte3);
-                    }
-                }
-            }
-        }
-
-        SECTION("ill-formed: wrong second byte")
-        {
-            for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1)
-            {
-                for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2)
-                {
-                    // skip correct second byte
-                    if (0x80 <= byte2 and byte2 <= 0xBF)
-                    {
-                        continue;
-                    }
-
-                    for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
-                    {
-                        for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4)
+                        for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
                         {
-                            check_utf8string(false, byte1, byte2, byte3, byte4);
+                            for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4)
+                            {
+                                check_utf8string(true, byte1, byte2, byte3, byte4);
+                            }
                         }
                     }
                 }
             }
-        }
 
-        SECTION("ill-formed: wrong third byte")
-        {
-            for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1)
+            SECTION("ill-formed: missing second byte")
             {
-                for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2)
+                for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1)
                 {
-                    for (int byte3 = 0x00; byte3 <= 0xFF; ++byte3)
+                    check_utf8string(false, byte1);
+                }
+            }
+
+            SECTION("ill-formed: missing third byte")
+            {
+                for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1)
+                {
+                    for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2)
                     {
-                        // skip correct third byte
-                        if (0x80 <= byte3 and byte3 <= 0xBF)
+                        check_utf8string(false, byte1, byte2);
+                    }
+                }
+            }
+
+            SECTION("ill-formed: missing fourth byte")
+            {
+                for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1)
+                {
+                    for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2)
+                    {
+                        for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
+                        {
+                            check_utf8string(false, byte1, byte2, byte3);
+                        }
+                    }
+                }
+            }
+
+            SECTION("ill-formed: wrong second byte")
+            {
+                for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1)
+                {
+                    for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2)
+                    {
+                        // skip correct second byte
+                        if (0x80 <= byte2 and byte2 <= 0xBF)
                         {
                             continue;
                         }
 
-                        for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4)
+                        for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
                         {
-                            check_utf8string(false, byte1, byte2, byte3, byte4);
+                            for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4)
+                            {
+                                check_utf8string(false, byte1, byte2, byte3, byte4);
+                            }
                         }
                     }
                 }
             }
-        }
 
-        SECTION("ill-formed: wrong fourth byte")
-        {
-            for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1)
+            SECTION("ill-formed: wrong third byte")
             {
-                for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2)
+                for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1)
                 {
-                    for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
+                    for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2)
                     {
-                        for (int byte4 = 0x00; byte4 <= 0xFF; ++byte4)
+                        for (int byte3 = 0x00; byte3 <= 0xFF; ++byte3)
                         {
-                            // skip correct fourth byte
+                            // skip correct third byte
                             if (0x80 <= byte3 and byte3 <= 0xBF)
                             {
                                 continue;
                             }
 
-                            check_utf8string(false, byte1, byte2, byte3, byte4);
+                            for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4)
+                            {
+                                check_utf8string(false, byte1, byte2, byte3, byte4);
+                            }
                         }
                     }
                 }
             }
-        }
-    }
 
-    SECTION("UTF8-4 (xF4 x80-8F UTF8-tail UTF8-tail)")
-    {
-        SECTION("well-formed")
-        {
-            for (int byte1 = 0xF4; byte1 <= 0xF4; ++byte1)
+            SECTION("ill-formed: wrong fourth byte")
             {
-                for (int byte2 = 0x80; byte2 <= 0x8F; ++byte2)
+                for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1)
                 {
-                    for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
+                    for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2)
                     {
-                        for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4)
+                        for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
                         {
-                            check_utf8string(true, byte1, byte2, byte3, byte4);
+                            for (int byte4 = 0x00; byte4 <= 0xFF; ++byte4)
+                            {
+                                // skip correct fourth byte
+                                if (0x80 <= byte3 and byte3 <= 0xBF)
+                                {
+                                    continue;
+                                }
+
+                                check_utf8string(false, byte1, byte2, byte3, byte4);
+                            }
                         }
                     }
                 }
             }
         }
 
-        SECTION("ill-formed: missing second byte")
+        SECTION("UTF8-4 (xF4 x80-8F UTF8-tail UTF8-tail)")
         {
-            for (int byte1 = 0xF4; byte1 <= 0xF4; ++byte1)
+            SECTION("well-formed")
             {
-                check_utf8string(false, byte1);
-            }
-        }
-
-        SECTION("ill-formed: missing third byte")
-        {
-            for (int byte1 = 0xF4; byte1 <= 0xF4; ++byte1)
-            {
-                for (int byte2 = 0x80; byte2 <= 0x8F; ++byte2)
+                for (int byte1 = 0xF4; byte1 <= 0xF4; ++byte1)
                 {
-                    check_utf8string(false, byte1, byte2);
-                }
-            }
-        }
-
-        SECTION("ill-formed: missing fourth byte")
-        {
-            for (int byte1 = 0xF4; byte1 <= 0xF4; ++byte1)
-            {
-                for (int byte2 = 0x80; byte2 <= 0x8F; ++byte2)
-                {
-                    for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
+                    for (int byte2 = 0x80; byte2 <= 0x8F; ++byte2)
                     {
-                        check_utf8string(false, byte1, byte2, byte3);
-                    }
-                }
-            }
-        }
-
-        SECTION("ill-formed: wrong second byte")
-        {
-            for (int byte1 = 0xF4; byte1 <= 0xF4; ++byte1)
-            {
-                for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2)
-                {
-                    // skip correct second byte
-                    if (0x80 <= byte2 and byte2 <= 0x8F)
-                    {
-                        continue;
-                    }
-
-                    for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
-                    {
-                        for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4)
+                        for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
                         {
-                            check_utf8string(false, byte1, byte2, byte3, byte4);
+                            for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4)
+                            {
+                                check_utf8string(true, byte1, byte2, byte3, byte4);
+                            }
                         }
                     }
                 }
             }
-        }
 
-        SECTION("ill-formed: wrong third byte")
-        {
-            for (int byte1 = 0xF4; byte1 <= 0xF4; ++byte1)
+            SECTION("ill-formed: missing second byte")
             {
-                for (int byte2 = 0x80; byte2 <= 0x8F; ++byte2)
+                for (int byte1 = 0xF4; byte1 <= 0xF4; ++byte1)
                 {
-                    for (int byte3 = 0x00; byte3 <= 0xFF; ++byte3)
+                    check_utf8string(false, byte1);
+                }
+            }
+
+            SECTION("ill-formed: missing third byte")
+            {
+                for (int byte1 = 0xF4; byte1 <= 0xF4; ++byte1)
+                {
+                    for (int byte2 = 0x80; byte2 <= 0x8F; ++byte2)
                     {
-                        // skip correct third byte
-                        if (0x80 <= byte3 and byte3 <= 0xBF)
+                        check_utf8string(false, byte1, byte2);
+                    }
+                }
+            }
+
+            SECTION("ill-formed: missing fourth byte")
+            {
+                for (int byte1 = 0xF4; byte1 <= 0xF4; ++byte1)
+                {
+                    for (int byte2 = 0x80; byte2 <= 0x8F; ++byte2)
+                    {
+                        for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
+                        {
+                            check_utf8string(false, byte1, byte2, byte3);
+                        }
+                    }
+                }
+            }
+
+            SECTION("ill-formed: wrong second byte")
+            {
+                for (int byte1 = 0xF4; byte1 <= 0xF4; ++byte1)
+                {
+                    for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2)
+                    {
+                        // skip correct second byte
+                        if (0x80 <= byte2 and byte2 <= 0x8F)
                         {
                             continue;
                         }
 
-                        for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4)
+                        for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
                         {
-                            check_utf8string(false, byte1, byte2, byte3, byte4);
+                            for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4)
+                            {
+                                check_utf8string(false, byte1, byte2, byte3, byte4);
+                            }
                         }
                     }
                 }
             }
-        }
 
-        SECTION("ill-formed: wrong fourth byte")
-        {
-            for (int byte1 = 0xF4; byte1 <= 0xF4; ++byte1)
+            SECTION("ill-formed: wrong third byte")
             {
-                for (int byte2 = 0x80; byte2 <= 0x8F; ++byte2)
+                for (int byte1 = 0xF4; byte1 <= 0xF4; ++byte1)
                 {
-                    for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
+                    for (int byte2 = 0x80; byte2 <= 0x8F; ++byte2)
                     {
-                        for (int byte4 = 0x00; byte4 <= 0xFF; ++byte4)
+                        for (int byte3 = 0x00; byte3 <= 0xFF; ++byte3)
                         {
-                            // skip correct fourth byte
+                            // skip correct third byte
                             if (0x80 <= byte3 and byte3 <= 0xBF)
                             {
                                 continue;
                             }
 
-                            check_utf8string(false, byte1, byte2, byte3, byte4);
+                            for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4)
+                            {
+                                check_utf8string(false, byte1, byte2, byte3, byte4);
+                            }
+                        }
+                    }
+                }
+            }
+
+            SECTION("ill-formed: wrong fourth byte")
+            {
+                for (int byte1 = 0xF4; byte1 <= 0xF4; ++byte1)
+                {
+                    for (int byte2 = 0x80; byte2 <= 0x8F; ++byte2)
+                    {
+                        for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
+                        {
+                            for (int byte4 = 0x00; byte4 <= 0xFF; ++byte4)
+                            {
+                                // skip correct fourth byte
+                                if (0x80 <= byte3 and byte3 <= 0xBF)
+                                {
+                                    continue;
+                                }
+
+                                check_utf8string(false, byte1, byte2, byte3, byte4);
+                            }
                         }
                     }
                 }
             }
         }
     }
-}
 
-TEST_CASE("Unicode", "[hide]")
-{
-    /* NOTE: to_unicode is not used any more
-    SECTION("full enumeration of Unicode code points")
+    SECTION("\\uxxxx sequences")
     {
-        // lexer to call to_unicode on
-        json::lexer dummy_lexer("", 0);
-
         // create an escaped string from a code point
         const auto codepoint_to_unicode = [](std::size_t cp)
         {
-            // copd points are represented as a six-character sequence: a
+            // code points are represented as a six-character sequence: a
             // reverse solidus, followed by the lowercase letter u, followed
             // by four hexadecimal digits that encode the character's code
             // point
@@ -872,72 +868,101 @@ TEST_CASE("Unicode", "[hide]")
             return ss.str();
         };
 
-        // generate all UTF-8 code points; in total, 1112064 code points are
-        // generated: 0x1FFFFF code points - 2048 invalid values between
-        // 0xD800 and 0xDFFF.
-        for (std::size_t cp = 0; cp <= 0x10FFFFu; ++cp)
+        SECTION("correct sequences")
         {
-            // The Unicode standard permanently reserves these code point
-            // values for UTF-16 encoding of the high and low surrogates, and
-            // they will never be assigned a character, so there should be no
-            // reason to encode them. The official Unicode standard says that
-            // no UTF forms, including UTF-16, can encode these code points.
-            if (cp >= 0xD800u and cp <= 0xDFFFu)
+            // generate all UTF-8 code points; in total, 1112064 code points are
+            // generated: 0x1FFFFF code points - 2048 invalid values between
+            // 0xD800 and 0xDFFF.
+            for (std::size_t cp = 0; cp <= 0x10FFFFu; ++cp)
             {
-                // if we would not skip these code points, we would get a
-                // "missing low surrogate" exception
-                continue;
-            }
+                // string to store the code point as in \uxxxx format
+                std::string json_text = "\"";
 
-            // string to store the code point as in \uxxxx format
-            std::string escaped_string;
-            // string to store the code point as unescaped character sequence
-            std::string unescaped_string;
-
-            if (cp < 0x10000u)
-            {
-                // code points in the Basic Multilingual Plane can be
-                // represented with one \\uxxxx sequence
-                escaped_string = codepoint_to_unicode(cp);
-
-                // All Unicode characters may be placed within the quotation
-                // marks, except for the characters that must be escaped:
-                // quotation mark, reverse solidus, and the control characters
-                // (U+0000 through U+001F); we ignore these code points as
-                // they are checked with codepoint_to_unicode.
-                if (cp > 0x1f and cp != 0x22 and cp != 0x5c)
+                // decide whether to use one or two \uxxxx sequences
+                if (cp < 0x10000u)
                 {
-                    unescaped_string = dummy_lexer.to_unicode(cp);
+                    // The Unicode standard permanently reserves these code point
+                    // values for UTF-16 encoding of the high and low surrogates, and
+                    // they will never be assigned a character, so there should be no
+                    // reason to encode them. The official Unicode standard says that
+                    // no UTF forms, including UTF-16, can encode these code points.
+                    if (cp >= 0xD800u and cp <= 0xDFFFu)
+                    {
+                        // if we would not skip these code points, we would get a
+                        // "missing low surrogate" exception
+                        continue;
+                    }
+
+                    // code points in the Basic Multilingual Plane can be
+                    // represented with one \uxxxx sequence
+                    json_text += codepoint_to_unicode(cp);
+                }
+                else
+                {
+                    // To escape an extended character that is not in the Basic
+                    // Multilingual Plane, the character is represented as a
+                    // 12-character sequence, encoding the UTF-16 surrogate pair
+                    const auto codepoint1 = 0xd800u + (((cp - 0x10000u) >> 10) & 0x3ffu);
+                    const auto codepoint2 = 0xdc00u + ((cp - 0x10000u) & 0x3ffu);
+                    json_text += codepoint_to_unicode(codepoint1) + codepoint_to_unicode(codepoint2);
+                }
+
+                json_text += "\"";
+                CAPTURE(json_text);
+                CHECK_NOTHROW(json::parse(json_text));
+            }
+        }
+
+        SECTION("incorrect sequences")
+        {
+            SECTION("high surrogate without low surrogate")
+            {
+                // D800..DBFF are high surrogates and must be followed by low
+                // surrogates DC00..DFFF; here, nothing follows
+                for (std::size_t cp = 0xD800u; cp <= 0xDBFFu; ++cp)
+                {
+                    std::string json_text = "\"" + codepoint_to_unicode(cp) + "\"";
+                    CAPTURE(json_text);
+                    CHECK_THROWS_AS(json::parse(json_text), json::parse_error);
                 }
             }
-            else
+
+#if 0
+            SECTION("high surrogate with wrong low surrogate")
             {
-                // To escape an extended character that is not in the Basic
-                // Multilingual Plane, the character is represented as a
-                // 12-character sequence, encoding the UTF-16 surrogate pair
-                const auto codepoint1 = 0xd800u + (((cp - 0x10000u) >> 10) & 0x3ffu);
-                const auto codepoint2 = 0xdc00u + ((cp - 0x10000u) & 0x3ffu);
-                escaped_string = codepoint_to_unicode(codepoint1);
-                escaped_string += codepoint_to_unicode(codepoint2);
-                unescaped_string += dummy_lexer.to_unicode(codepoint1, codepoint2);
+                // D800..DBFF are high surrogates and must be followed by low
+                // surrogates DC00..DFFF; here a different sequence follows
+                for (std::size_t cp1 = 0xD800u; cp1 <= 0xDBFFu; ++cp1)
+                {
+                    for (std::size_t cp2 = 0x0000u; cp2 <= 0xFFFFu; ++cp2)
+                    {
+                        if (0xDC00u <= cp2 and cp2 <= 0xDFFFu)
+                        {
+                            continue;
+                        }
+
+                        std::string json_text = "\"" + codepoint_to_unicode(cp1) + codepoint_to_unicode(cp2) + "\"";
+                        CAPTURE(json_text);
+                        CHECK_THROWS_AS(json::parse(json_text), json::parse_error);
+                    }
+                }
+            }
+#endif
+
+            SECTION("low surrogate without high surrogate")
+            {
+                // low surrogates DC00..DFFF must follow high surrogates; here,
+                // they occur alone
+                for (std::size_t cp = 0xDC00u; cp <= 0xDFFFu; ++cp)
+                {
+                    std::string json_text = "\"" + codepoint_to_unicode(cp) + "\"";
+                    CAPTURE(json_text);
+                    CHECK_THROWS_AS(json::parse(json_text), json::parse_error);
+                }
             }
 
-            // all other code points are valid and must not yield parse errors
-            CAPTURE(cp);
-            CAPTURE(escaped_string);
-            CAPTURE(unescaped_string);
-
-            json j1, j2, j3, j4;
-            CHECK_NOTHROW(j1 = json::parse("\"" + escaped_string + "\""));
-            CHECK_NOTHROW(j2 = json::parse(j1.dump()));
-            CHECK(j1 == j2);
-
-            CHECK_NOTHROW(j3 = json::parse("\"" + unescaped_string + "\""));
-            CHECK_NOTHROW(j4 = json::parse(j3.dump()));
-            CHECK(j3 == j4);
         }
     }
-     */
 
     SECTION("read all unicode characters")
     {