From 1d66ab9f7a5898267ebecefe5c2ba4ee0ffec613 Mon Sep 17 00:00:00 2001 From: Niels Date: Tue, 23 Aug 2016 22:38:05 +0200 Subject: [PATCH] fixed lexer issue which required null byte at the end of contiguous storage containers #290 --- src/json.hpp | 49 +++++++++++++++++++++---------- src/json.hpp.re2c | 49 +++++++++++++++++++++---------- test/src/unit-class_parser.cpp | 10 +++---- test/src/unit-deserialization.cpp | 18 ++++++------ 4 files changed, 82 insertions(+), 44 deletions(-) diff --git a/src/json.hpp b/src/json.hpp index ac5c78b3..0e25f746 100644 --- a/src/json.hpp +++ b/src/json.hpp @@ -8538,6 +8538,13 @@ basic_json_parser_63: incremented without leaving the limits of the line buffer. Note re2c decides when to call this function. + If the lexer reads from contiguous storage, there is no trailing null + byte. Therefore, this function must make sure to add these padding + null bytes. + + If the lexer reads from an input stream, this function reads the next + line of the input. + @pre p p p p p p u u u u u x . . . . . . ^ ^ ^ ^ @@ -8553,26 +8560,38 @@ basic_json_parser_63: */ void fill_line_buffer() { - // no stream is used or end of file is reached - if (m_stream == nullptr or not * m_stream) - { - return; - } - // number of processed characters (p) const auto offset_start = m_start - m_content; // offset for m_marker wrt. to m_start - const auto offset_marker = m_marker - m_start; + const auto offset_marker = (m_marker == nullptr) ? 0 : m_marker - m_start; // number of unprocessed characters (u) const auto offset_cursor = m_cursor - m_start; - // delete processed characters from line buffer - m_line_buffer.erase(0, static_cast(offset_start)); - // read next line from input stream - std::string line; - std::getline(*m_stream, line); - // add line with newline symbol to the line buffer - m_line_buffer += "\n" + line; + // no stream is used or end of file is reached + if (m_stream == nullptr or not * m_stream) + { + // copy unprocessed characters to line buffer + m_line_buffer.clear(); + for (m_cursor = m_start; m_cursor != m_limit; ++m_cursor) + { + m_line_buffer.append(1, static_cast(*m_cursor)); + } + + // append 5 characters (size of longest keyword "false") to + // make sure that there is sufficient space between m_cursor + // and m_limit + m_line_buffer.append(5, '\0'); + } + else + { + // delete processed characters from line buffer + m_line_buffer.erase(0, static_cast(offset_start)); + // read next line from input stream + std::string line; + std::getline(*m_stream, line); + // add line with newline symbol to the line buffer + m_line_buffer += "\n" + line; + } // set pointers m_content = reinterpret_cast(m_line_buffer.c_str()); @@ -8580,7 +8599,7 @@ basic_json_parser_63: m_start = m_content; m_marker = m_start + offset_marker; m_cursor = m_start + offset_cursor; - m_limit = m_start + m_line_buffer.size() - 1; + m_limit = m_start + m_line_buffer.size(); } /// return string representation of last read token diff --git a/src/json.hpp.re2c b/src/json.hpp.re2c index dffb11af..089517bb 100644 --- a/src/json.hpp.re2c +++ b/src/json.hpp.re2c @@ -7835,6 +7835,13 @@ class basic_json incremented without leaving the limits of the line buffer. Note re2c decides when to call this function. + If the lexer reads from contiguous storage, there is no trailing null + byte. Therefore, this function must make sure to add these padding + null bytes. + + If the lexer reads from an input stream, this function reads the next + line of the input. + @pre p p p p p p u u u u u x . . . . . . ^ ^ ^ ^ @@ -7850,26 +7857,38 @@ class basic_json */ void fill_line_buffer() { - // no stream is used or end of file is reached - if (m_stream == nullptr or not * m_stream) - { - return; - } - // number of processed characters (p) const auto offset_start = m_start - m_content; // offset for m_marker wrt. to m_start - const auto offset_marker = m_marker - m_start; + const auto offset_marker = (m_marker == nullptr) ? 0 : m_marker - m_start; // number of unprocessed characters (u) const auto offset_cursor = m_cursor - m_start; - // delete processed characters from line buffer - m_line_buffer.erase(0, static_cast(offset_start)); - // read next line from input stream - std::string line; - std::getline(*m_stream, line); - // add line with newline symbol to the line buffer - m_line_buffer += "\n" + line; + // no stream is used or end of file is reached + if (m_stream == nullptr or not * m_stream) + { + // copy unprocessed characters to line buffer + m_line_buffer.clear(); + for (m_cursor = m_start; m_cursor != m_limit; ++m_cursor) + { + m_line_buffer.append(1, static_cast(*m_cursor)); + } + + // append 5 characters (size of longest keyword "false") to + // make sure that there is sufficient space between m_cursor + // and m_limit + m_line_buffer.append(5, '\0'); + } + else + { + // delete processed characters from line buffer + m_line_buffer.erase(0, static_cast(offset_start)); + // read next line from input stream + std::string line; + std::getline(*m_stream, line); + // add line with newline symbol to the line buffer + m_line_buffer += "\n" + line; + } // set pointers m_content = reinterpret_cast(m_line_buffer.c_str()); @@ -7877,7 +7896,7 @@ class basic_json m_start = m_content; m_marker = m_start + offset_marker; m_cursor = m_start + offset_cursor; - m_limit = m_start + m_line_buffer.size() - 1; + m_limit = m_start + m_line_buffer.size(); } /// return string representation of last read token diff --git a/test/src/unit-class_parser.cpp b/test/src/unit-class_parser.cpp index 32a6ac8f..6fcf947d 100644 --- a/test/src/unit-class_parser.cpp +++ b/test/src/unit-class_parser.cpp @@ -761,19 +761,19 @@ TEST_CASE("parser class") { SECTION("from std::vector") { - std::vector v = {'t', 'r', 'u', 'e', '\0'}; + std::vector v = {'t', 'r', 'u', 'e'}; CHECK(json::parser(std::begin(v), std::end(v)).parse() == json(true)); } SECTION("from std::array") { - std::array v { {'t', 'r', 'u', 'e', '\0'} }; + std::array v { {'t', 'r', 'u', 'e'} }; CHECK(json::parser(std::begin(v), std::end(v)).parse() == json(true)); } SECTION("from array") { - uint8_t v[] = {'t', 'r', 'u', 'e', '\0'}; + uint8_t v[] = {'t', 'r', 'u', 'e'}; CHECK(json::parser(std::begin(v), std::end(v)).parse() == json(true)); } @@ -790,13 +790,13 @@ TEST_CASE("parser class") SECTION("from std::initializer_list") { - std::initializer_list v = {'t', 'r', 'u', 'e', '\0'}; + std::initializer_list v = {'t', 'r', 'u', 'e'}; CHECK(json::parser(std::begin(v), std::end(v)).parse() == json(true)); } SECTION("from std::valarray") { - std::valarray v = {'t', 'r', 'u', 'e', '\0'}; + std::valarray v = {'t', 'r', 'u', 'e'}; CHECK(json::parser(std::begin(v), std::end(v)).parse() == json(true)); } } diff --git a/test/src/unit-deserialization.cpp b/test/src/unit-deserialization.cpp index 6e2c7813..dcd7c272 100644 --- a/test/src/unit-deserialization.cpp +++ b/test/src/unit-deserialization.cpp @@ -86,19 +86,19 @@ TEST_CASE("deserialization") { SECTION("from std::vector") { - std::vector v = {'t', 'r', 'u', 'e', '\0'}; + std::vector v = {'t', 'r', 'u', 'e'}; CHECK(json::parse(v) == json(true)); } SECTION("from std::array") { - std::array v { {'t', 'r', 'u', 'e', '\0'} }; + std::array v { {'t', 'r', 'u', 'e'} }; CHECK(json::parse(v) == json(true)); } SECTION("from array") { - uint8_t v[] = {'t', 'r', 'u', 'e', '\0'}; + uint8_t v[] = {'t', 'r', 'u', 'e'}; CHECK(json::parse(v) == json(true)); } @@ -110,7 +110,7 @@ TEST_CASE("deserialization") SECTION("from std::initializer_list") { - std::initializer_list v = {'t', 'r', 'u', 'e', '\0'}; + std::initializer_list v = {'t', 'r', 'u', 'e'}; CHECK(json::parse(v) == json(true)); } @@ -125,19 +125,19 @@ TEST_CASE("deserialization") { SECTION("from std::vector") { - std::vector v = {'t', 'r', 'u', 'e', '\0'}; + std::vector v = {'t', 'r', 'u', 'e'}; CHECK(json::parse(std::begin(v), std::end(v)) == json(true)); } SECTION("from std::array") { - std::array v { {'t', 'r', 'u', 'e', '\0'} }; + std::array v { {'t', 'r', 'u', 'e'} }; CHECK(json::parse(std::begin(v), std::end(v)) == json(true)); } SECTION("from array") { - uint8_t v[] = {'t', 'r', 'u', 'e', '\0'}; + uint8_t v[] = {'t', 'r', 'u', 'e'}; CHECK(json::parse(std::begin(v), std::end(v)) == json(true)); } @@ -149,13 +149,13 @@ TEST_CASE("deserialization") SECTION("from std::initializer_list") { - std::initializer_list v = {'t', 'r', 'u', 'e', '\0'}; + std::initializer_list v = {'t', 'r', 'u', 'e'}; CHECK(json::parse(std::begin(v), std::end(v)) == json(true)); } SECTION("from std::valarray") { - std::valarray v = {'t', 'r', 'u', 'e', '\0'}; + std::valarray v = {'t', 'r', 'u', 'e'}; CHECK(json::parse(std::begin(v), std::end(v)) == json(true)); }