From 1d66ab9f7a5898267ebecefe5c2ba4ee0ffec613 Mon Sep 17 00:00:00 2001
From: Niels <niels.lohmann@gmail.com>
Date: Tue, 23 Aug 2016 22:38:05 +0200
Subject: [PATCH] fixed lexer issue which required null byte at the end of
 contiguous storage containers #290

---
 src/json.hpp                      | 49 +++++++++++++++++++++----------
 src/json.hpp.re2c                 | 49 +++++++++++++++++++++----------
 test/src/unit-class_parser.cpp    | 10 +++----
 test/src/unit-deserialization.cpp | 18 ++++++------
 4 files changed, 82 insertions(+), 44 deletions(-)

diff --git a/src/json.hpp b/src/json.hpp
index ac5c78b3..0e25f746 100644
--- a/src/json.hpp
+++ b/src/json.hpp
@@ -8538,6 +8538,13 @@ basic_json_parser_63:
         incremented without leaving the limits of the line buffer. Note re2c
         decides when to call this function.
 
+        If the lexer reads from contiguous storage, there is no trailing null
+        byte. Therefore, this function must make sure to add these padding
+        null bytes.
+
+        If the lexer reads from an input stream, this function reads the next
+        line of the input.
+
         @pre
             p p p p p p u u u u u x . . . . . .
             ^           ^       ^   ^
@@ -8553,26 +8560,38 @@ basic_json_parser_63:
         */
         void fill_line_buffer()
         {
-            // no stream is used or end of file is reached
-            if (m_stream == nullptr or not * m_stream)
-            {
-                return;
-            }
-
             // number of processed characters (p)
             const auto offset_start = m_start - m_content;
             // offset for m_marker wrt. to m_start
-            const auto offset_marker = m_marker - m_start;
+            const auto offset_marker = (m_marker == nullptr) ? 0 : m_marker - m_start;
             // number of unprocessed characters (u)
             const auto offset_cursor = m_cursor - m_start;
 
-            // delete processed characters from line buffer
-            m_line_buffer.erase(0, static_cast<size_t>(offset_start));
-            // read next line from input stream
-            std::string line;
-            std::getline(*m_stream, line);
-            // add line with newline symbol to the line buffer
-            m_line_buffer += "\n" + line;
+            // no stream is used or end of file is reached
+            if (m_stream == nullptr or not * m_stream)
+            {
+                // copy unprocessed characters to line buffer
+                m_line_buffer.clear();
+                for (m_cursor = m_start; m_cursor != m_limit; ++m_cursor)
+                {
+                    m_line_buffer.append(1, static_cast<const char>(*m_cursor));
+                }
+
+                // append 5 characters (size of longest keyword "false") to
+                // make sure that there is sufficient space between m_cursor
+                // and m_limit
+                m_line_buffer.append(5, '\0');
+            }
+            else
+            {
+                // delete processed characters from line buffer
+                m_line_buffer.erase(0, static_cast<size_t>(offset_start));
+                // read next line from input stream
+                std::string line;
+                std::getline(*m_stream, line);
+                // add line with newline symbol to the line buffer
+                m_line_buffer += "\n" + line;
+            }
 
             // set pointers
             m_content = reinterpret_cast<const lexer_char_t*>(m_line_buffer.c_str());
@@ -8580,7 +8599,7 @@ basic_json_parser_63:
             m_start  = m_content;
             m_marker = m_start + offset_marker;
             m_cursor = m_start + offset_cursor;
-            m_limit  = m_start + m_line_buffer.size() - 1;
+            m_limit  = m_start + m_line_buffer.size();
         }
 
         /// return string representation of last read token
diff --git a/src/json.hpp.re2c b/src/json.hpp.re2c
index dffb11af..089517bb 100644
--- a/src/json.hpp.re2c
+++ b/src/json.hpp.re2c
@@ -7835,6 +7835,13 @@ class basic_json
         incremented without leaving the limits of the line buffer. Note re2c
         decides when to call this function.
 
+        If the lexer reads from contiguous storage, there is no trailing null
+        byte. Therefore, this function must make sure to add these padding
+        null bytes.
+
+        If the lexer reads from an input stream, this function reads the next
+        line of the input.
+
         @pre
             p p p p p p u u u u u x . . . . . .
             ^           ^       ^   ^
@@ -7850,26 +7857,38 @@ class basic_json
         */
         void fill_line_buffer()
         {
-            // no stream is used or end of file is reached
-            if (m_stream == nullptr or not * m_stream)
-            {
-                return;
-            }
-
             // number of processed characters (p)
             const auto offset_start = m_start - m_content;
             // offset for m_marker wrt. to m_start
-            const auto offset_marker = m_marker - m_start;
+            const auto offset_marker = (m_marker == nullptr) ? 0 : m_marker - m_start;
             // number of unprocessed characters (u)
             const auto offset_cursor = m_cursor - m_start;
 
-            // delete processed characters from line buffer
-            m_line_buffer.erase(0, static_cast<size_t>(offset_start));
-            // read next line from input stream
-            std::string line;
-            std::getline(*m_stream, line);
-            // add line with newline symbol to the line buffer
-            m_line_buffer += "\n" + line;
+            // no stream is used or end of file is reached
+            if (m_stream == nullptr or not * m_stream)
+            {
+                // copy unprocessed characters to line buffer
+                m_line_buffer.clear();
+                for (m_cursor = m_start; m_cursor != m_limit; ++m_cursor)
+                {
+                    m_line_buffer.append(1, static_cast<const char>(*m_cursor));
+                }
+
+                // append 5 characters (size of longest keyword "false") to
+                // make sure that there is sufficient space between m_cursor
+                // and m_limit
+                m_line_buffer.append(5, '\0');
+            }
+            else
+            {
+                // delete processed characters from line buffer
+                m_line_buffer.erase(0, static_cast<size_t>(offset_start));
+                // read next line from input stream
+                std::string line;
+                std::getline(*m_stream, line);
+                // add line with newline symbol to the line buffer
+                m_line_buffer += "\n" + line;
+            }
 
             // set pointers
             m_content = reinterpret_cast<const lexer_char_t*>(m_line_buffer.c_str());
@@ -7877,7 +7896,7 @@ class basic_json
             m_start  = m_content;
             m_marker = m_start + offset_marker;
             m_cursor = m_start + offset_cursor;
-            m_limit  = m_start + m_line_buffer.size() - 1;
+            m_limit  = m_start + m_line_buffer.size();
         }
 
         /// return string representation of last read token
diff --git a/test/src/unit-class_parser.cpp b/test/src/unit-class_parser.cpp
index 32a6ac8f..6fcf947d 100644
--- a/test/src/unit-class_parser.cpp
+++ b/test/src/unit-class_parser.cpp
@@ -761,19 +761,19 @@ TEST_CASE("parser class")
     {
         SECTION("from std::vector")
         {
-            std::vector<uint8_t> v = {'t', 'r', 'u', 'e', '\0'};
+            std::vector<uint8_t> v = {'t', 'r', 'u', 'e'};
             CHECK(json::parser(std::begin(v), std::end(v)).parse() == json(true));
         }
 
         SECTION("from std::array")
         {
-            std::array<uint8_t, 5> v { {'t', 'r', 'u', 'e', '\0'} };
+            std::array<uint8_t, 5> v { {'t', 'r', 'u', 'e'} };
             CHECK(json::parser(std::begin(v), std::end(v)).parse() == json(true));
         }
 
         SECTION("from array")
         {
-            uint8_t v[] = {'t', 'r', 'u', 'e', '\0'};
+            uint8_t v[] = {'t', 'r', 'u', 'e'};
             CHECK(json::parser(std::begin(v), std::end(v)).parse() == json(true));
         }
 
@@ -790,13 +790,13 @@ TEST_CASE("parser class")
 
         SECTION("from std::initializer_list")
         {
-            std::initializer_list<uint8_t> v = {'t', 'r', 'u', 'e', '\0'};
+            std::initializer_list<uint8_t> v = {'t', 'r', 'u', 'e'};
             CHECK(json::parser(std::begin(v), std::end(v)).parse() == json(true));
         }
 
         SECTION("from std::valarray")
         {
-            std::valarray<uint8_t> v = {'t', 'r', 'u', 'e', '\0'};
+            std::valarray<uint8_t> v = {'t', 'r', 'u', 'e'};
             CHECK(json::parser(std::begin(v), std::end(v)).parse() == json(true));
         }
     }
diff --git a/test/src/unit-deserialization.cpp b/test/src/unit-deserialization.cpp
index 6e2c7813..dcd7c272 100644
--- a/test/src/unit-deserialization.cpp
+++ b/test/src/unit-deserialization.cpp
@@ -86,19 +86,19 @@ TEST_CASE("deserialization")
         {
             SECTION("from std::vector")
             {
-                std::vector<uint8_t> v = {'t', 'r', 'u', 'e', '\0'};
+                std::vector<uint8_t> v = {'t', 'r', 'u', 'e'};
                 CHECK(json::parse(v) == json(true));
             }
 
             SECTION("from std::array")
             {
-                std::array<uint8_t, 5> v { {'t', 'r', 'u', 'e', '\0'} };
+                std::array<uint8_t, 5> v { {'t', 'r', 'u', 'e'} };
                 CHECK(json::parse(v) == json(true));
             }
 
             SECTION("from array")
             {
-                uint8_t v[] = {'t', 'r', 'u', 'e', '\0'};
+                uint8_t v[] = {'t', 'r', 'u', 'e'};
                 CHECK(json::parse(v) == json(true));
             }
 
@@ -110,7 +110,7 @@ TEST_CASE("deserialization")
 
             SECTION("from std::initializer_list")
             {
-                std::initializer_list<uint8_t> v = {'t', 'r', 'u', 'e', '\0'};
+                std::initializer_list<uint8_t> v = {'t', 'r', 'u', 'e'};
                 CHECK(json::parse(v) == json(true));
             }
 
@@ -125,19 +125,19 @@ TEST_CASE("deserialization")
         {
             SECTION("from std::vector")
             {
-                std::vector<uint8_t> v = {'t', 'r', 'u', 'e', '\0'};
+                std::vector<uint8_t> v = {'t', 'r', 'u', 'e'};
                 CHECK(json::parse(std::begin(v), std::end(v)) == json(true));
             }
 
             SECTION("from std::array")
             {
-                std::array<uint8_t, 5> v { {'t', 'r', 'u', 'e', '\0'} };
+                std::array<uint8_t, 5> v { {'t', 'r', 'u', 'e'} };
                 CHECK(json::parse(std::begin(v), std::end(v)) == json(true));
             }
 
             SECTION("from array")
             {
-                uint8_t v[] = {'t', 'r', 'u', 'e', '\0'};
+                uint8_t v[] = {'t', 'r', 'u', 'e'};
                 CHECK(json::parse(std::begin(v), std::end(v)) == json(true));
             }
 
@@ -149,13 +149,13 @@ TEST_CASE("deserialization")
 
             SECTION("from std::initializer_list")
             {
-                std::initializer_list<uint8_t> v = {'t', 'r', 'u', 'e', '\0'};
+                std::initializer_list<uint8_t> v = {'t', 'r', 'u', 'e'};
                 CHECK(json::parse(std::begin(v), std::end(v)) == json(true));
             }
 
             SECTION("from std::valarray")
             {
-                std::valarray<uint8_t> v = {'t', 'r', 'u', 'e', '\0'};
+                std::valarray<uint8_t> v = {'t', 'r', 'u', 'e'};
                 CHECK(json::parse(std::begin(v), std::end(v)) == json(true));
             }