fixed lexer issue which required null byte at the end of contiguous storage containers #290

2016-08-23 22:38:05 +02:00 · 2016-08-23 22:38:05 +02:00 · 1d66ab9f7a
commit 1d66ab9f7a
parent a79d634ccb
4 changed files with 82 additions and 44 deletions
--- a/src/json.hpp
+++ b/src/json.hpp
@ -8538,6 +8538,13 @@ basic_json_parser_63:
        incremented without leaving the limits of the line buffer. Note re2c
        decides when to call this function.
        If the lexer reads from contiguous storage, there is no trailing null
        byte. Therefore, this function must make sure to add these padding
        null bytes.
        If the lexer reads from an input stream, this function reads the next
        line of the input.
        @pre
            p p p p p p u u u u u x . . . . . .
            ^           ^       ^   ^
@ -8553,26 +8560,38 @@ basic_json_parser_63:
        */
        void fill_line_buffer()
        {
            // no stream is used or end of file is reached
            if (m_stream == nullptr or not * m_stream)
            {
                return;
            }
            // number of processed characters (p)
            const auto offset_start = m_start - m_content;
            // offset for m_marker wrt. to m_start
-            const auto offset_marker = m_marker - m_start;
+            const auto offset_marker = (m_marker == nullptr) ? 0 : m_marker - m_start;
            // number of unprocessed characters (u)
            const auto offset_cursor = m_cursor - m_start;
-            // delete processed characters from line buffer
+            // no stream is used or end of file is reached
-            m_line_buffer.erase(0, static_cast<size_t>(offset_start));
+            if (m_stream == nullptr or not * m_stream)
-            // read next line from input stream
+            {
-            std::string line;
+                // copy unprocessed characters to line buffer
-            std::getline(*m_stream, line);
+                m_line_buffer.clear();
-            // add line with newline symbol to the line buffer
+                for (m_cursor = m_start; m_cursor != m_limit; ++m_cursor)
-            m_line_buffer += "\n" + line;
+                {
                    m_line_buffer.append(1, static_cast<const char>(*m_cursor));
                }
                // append 5 characters (size of longest keyword "false") to
                // make sure that there is sufficient space between m_cursor
                // and m_limit
                m_line_buffer.append(5, '\0');
            }
            else
            {
                // delete processed characters from line buffer
                m_line_buffer.erase(0, static_cast<size_t>(offset_start));
                // read next line from input stream
                std::string line;
                std::getline(*m_stream, line);
                // add line with newline symbol to the line buffer
                m_line_buffer += "\n" + line;
            }
            // set pointers
            m_content = reinterpret_cast<const lexer_char_t*>(m_line_buffer.c_str());
@ -8580,7 +8599,7 @@ basic_json_parser_63:
            m_start  = m_content;
            m_marker = m_start + offset_marker;
            m_cursor = m_start + offset_cursor;
-            m_limit  = m_start + m_line_buffer.size() - 1;
+            m_limit  = m_start + m_line_buffer.size();
        }
        /// return string representation of last read token
--- a/src/json.hpp.re2c
+++ b/src/json.hpp.re2c
@ -7835,6 +7835,13 @@ class basic_json
        incremented without leaving the limits of the line buffer. Note re2c
        decides when to call this function.
        If the lexer reads from contiguous storage, there is no trailing null
        byte. Therefore, this function must make sure to add these padding
        null bytes.
        If the lexer reads from an input stream, this function reads the next
        line of the input.
        @pre
            p p p p p p u u u u u x . . . . . .
            ^           ^       ^   ^
@ -7850,26 +7857,38 @@ class basic_json
        */
        void fill_line_buffer()
        {
            // no stream is used or end of file is reached
            if (m_stream == nullptr or not * m_stream)
            {
                return;
            }
            // number of processed characters (p)
            const auto offset_start = m_start - m_content;
            // offset for m_marker wrt. to m_start
-            const auto offset_marker = m_marker - m_start;
+            const auto offset_marker = (m_marker == nullptr) ? 0 : m_marker - m_start;
            // number of unprocessed characters (u)
            const auto offset_cursor = m_cursor - m_start;
-            // delete processed characters from line buffer
+            // no stream is used or end of file is reached
-            m_line_buffer.erase(0, static_cast<size_t>(offset_start));
+            if (m_stream == nullptr or not * m_stream)
-            // read next line from input stream
+            {
-            std::string line;
+                // copy unprocessed characters to line buffer
-            std::getline(*m_stream, line);
+                m_line_buffer.clear();
-            // add line with newline symbol to the line buffer
+                for (m_cursor = m_start; m_cursor != m_limit; ++m_cursor)
-            m_line_buffer += "\n" + line;
+                {
                    m_line_buffer.append(1, static_cast<const char>(*m_cursor));
                }
                // append 5 characters (size of longest keyword "false") to
                // make sure that there is sufficient space between m_cursor
                // and m_limit
                m_line_buffer.append(5, '\0');
            }
            else
            {
                // delete processed characters from line buffer
                m_line_buffer.erase(0, static_cast<size_t>(offset_start));
                // read next line from input stream
                std::string line;
                std::getline(*m_stream, line);
                // add line with newline symbol to the line buffer
                m_line_buffer += "\n" + line;
            }
            // set pointers
            m_content = reinterpret_cast<const lexer_char_t*>(m_line_buffer.c_str());
@ -7877,7 +7896,7 @@ class basic_json
            m_start  = m_content;
            m_marker = m_start + offset_marker;
            m_cursor = m_start + offset_cursor;
-            m_limit  = m_start + m_line_buffer.size() - 1;
+            m_limit  = m_start + m_line_buffer.size();
        }
        /// return string representation of last read token
--- a/test/src/unit-class_parser.cpp
+++ b/test/src/unit-class_parser.cpp
@ -761,19 +761,19 @@ TEST_CASE("parser class")
    {
        SECTION("from std::vector")
        {
-            std::vector<uint8_t> v = {'t', 'r', 'u', 'e', '\0'};
+            std::vector<uint8_t> v = {'t', 'r', 'u', 'e'};
            CHECK(json::parser(std::begin(v), std::end(v)).parse() == json(true));
        }
        SECTION("from std::array")
        {
-            std::array<uint8_t, 5> v { {'t', 'r', 'u', 'e', '\0'} };
+            std::array<uint8_t, 5> v { {'t', 'r', 'u', 'e'} };
            CHECK(json::parser(std::begin(v), std::end(v)).parse() == json(true));
        }
        SECTION("from array")
        {
-            uint8_t v[] = {'t', 'r', 'u', 'e', '\0'};
+            uint8_t v[] = {'t', 'r', 'u', 'e'};
            CHECK(json::parser(std::begin(v), std::end(v)).parse() == json(true));
        }
@ -790,13 +790,13 @@ TEST_CASE("parser class")
        SECTION("from std::initializer_list")
        {
-            std::initializer_list<uint8_t> v = {'t', 'r', 'u', 'e', '\0'};
+            std::initializer_list<uint8_t> v = {'t', 'r', 'u', 'e'};
            CHECK(json::parser(std::begin(v), std::end(v)).parse() == json(true));
        }
        SECTION("from std::valarray")
        {
-            std::valarray<uint8_t> v = {'t', 'r', 'u', 'e', '\0'};
+            std::valarray<uint8_t> v = {'t', 'r', 'u', 'e'};
            CHECK(json::parser(std::begin(v), std::end(v)).parse() == json(true));
        }
    }
--- a/test/src/unit-deserialization.cpp
+++ b/test/src/unit-deserialization.cpp
@ -86,19 +86,19 @@ TEST_CASE("deserialization")
        {
            SECTION("from std::vector")
            {
-                std::vector<uint8_t> v = {'t', 'r', 'u', 'e', '\0'};
+                std::vector<uint8_t> v = {'t', 'r', 'u', 'e'};
                CHECK(json::parse(v) == json(true));
            }
            SECTION("from std::array")
            {
-                std::array<uint8_t, 5> v { {'t', 'r', 'u', 'e', '\0'} };
+                std::array<uint8_t, 5> v { {'t', 'r', 'u', 'e'} };
                CHECK(json::parse(v) == json(true));
            }
            SECTION("from array")
            {
-                uint8_t v[] = {'t', 'r', 'u', 'e', '\0'};
+                uint8_t v[] = {'t', 'r', 'u', 'e'};
                CHECK(json::parse(v) == json(true));
            }
@ -110,7 +110,7 @@ TEST_CASE("deserialization")
            SECTION("from std::initializer_list")
            {
-                std::initializer_list<uint8_t> v = {'t', 'r', 'u', 'e', '\0'};
+                std::initializer_list<uint8_t> v = {'t', 'r', 'u', 'e'};
                CHECK(json::parse(v) == json(true));
            }
@ -125,19 +125,19 @@ TEST_CASE("deserialization")
        {
            SECTION("from std::vector")
            {
-                std::vector<uint8_t> v = {'t', 'r', 'u', 'e', '\0'};
+                std::vector<uint8_t> v = {'t', 'r', 'u', 'e'};
                CHECK(json::parse(std::begin(v), std::end(v)) == json(true));
            }
            SECTION("from std::array")
            {
-                std::array<uint8_t, 5> v { {'t', 'r', 'u', 'e', '\0'} };
+                std::array<uint8_t, 5> v { {'t', 'r', 'u', 'e'} };
                CHECK(json::parse(std::begin(v), std::end(v)) == json(true));
            }
            SECTION("from array")
            {
-                uint8_t v[] = {'t', 'r', 'u', 'e', '\0'};
+                uint8_t v[] = {'t', 'r', 'u', 'e'};
                CHECK(json::parse(std::begin(v), std::end(v)) == json(true));
            }
@ -149,13 +149,13 @@ TEST_CASE("deserialization")
            SECTION("from std::initializer_list")
            {
-                std::initializer_list<uint8_t> v = {'t', 'r', 'u', 'e', '\0'};
+                std::initializer_list<uint8_t> v = {'t', 'r', 'u', 'e'};
                CHECK(json::parse(std::begin(v), std::end(v)) == json(true));
            }
            SECTION("from std::valarray")
            {
-                std::valarray<uint8_t> v = {'t', 'r', 'u', 'e', '\0'};
+                std::valarray<uint8_t> v = {'t', 'r', 'u', 'e'};
                CHECK(json::parse(std::begin(v), std::end(v)) == json(true));
            }