From 3a5cf9bd0a58aed6f26ed0802f938d0ed348f75e Mon Sep 17 00:00:00 2001
From: Niels Lohmann <mail@nlohmann.me>
Date: Sat, 1 Apr 2017 08:34:58 +0200
Subject: [PATCH] :hammer: improved code coverage

---
 src/json.hpp                  | 10 ++++------
 test/src/unit-class_lexer.cpp |  9 +++++++++
 2 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/src/json.hpp b/src/json.hpp
index 4198b140..1c00e12a 100644
--- a/src/json.hpp
+++ b/src/json.hpp
@@ -10965,6 +10965,9 @@ class basic_json
                                     codepoint = codepoint1;
                                 }
 
+                                // result of the above calculation yields a proper codepoint
+                                assert(0x00 <= codepoint and codepoint <= 0x10FFFF);
+
                                 // translate code point to bytes
                                 if (codepoint < 0x80)
                                 {
@@ -10984,7 +10987,7 @@ class basic_json
                                     add(0x80 | ((codepoint >> 6) & 0x3F));
                                     add(0x80 | (codepoint & 0x3F));
                                 }
-                                else if (codepoint <= 0x10ffff)
+                                else
                                 {
                                     // 4-byte characters: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
                                     add(0xF0 | (codepoint >> 18));
@@ -10992,11 +10995,6 @@ class basic_json
                                     add(0x80 | ((codepoint >> 6) & 0x3F));
                                     add(0x80 | (codepoint & 0x3F));
                                 }
-                                else
-                                {
-                                    error_message = "invalid string: code points above U+10FFFF are invalid";
-                                    return token_type::parse_error;
-                                }
 
                                 break;
                             }
diff --git a/test/src/unit-class_lexer.cpp b/test/src/unit-class_lexer.cpp
index 468e1f52..2acea176 100644
--- a/test/src/unit-class_lexer.cpp
+++ b/test/src/unit-class_lexer.cpp
@@ -158,6 +158,15 @@ TEST_CASE("lexer class")
         }
     }
 
+    SECTION("very large string")
+    {
+        // strings larger than 1024 bytes yield a resize of the lexer's yytext buffer
+        std::string s("\"");
+        s += std::string(2048, 'x');
+        s += "\"";
+        CHECK((json::lexer(s.c_str(), 2050).scan() == json::lexer::token_type::value_string));
+    }
+
     /* NOTE: to_unicode function has been removed
     SECTION("to_unicode")
     {