fixed lexer issue which required null byte at the end of contiguous storage containers #290

This commit is contained in:
Niels 2016-08-23 22:38:05 +02:00
parent a79d634ccb
commit 1d66ab9f7a
4 changed files with 82 additions and 44 deletions

View file

@ -8538,6 +8538,13 @@ basic_json_parser_63:
incremented without leaving the limits of the line buffer. Note re2c incremented without leaving the limits of the line buffer. Note re2c
decides when to call this function. decides when to call this function.
If the lexer reads from contiguous storage, there is no trailing null
byte. Therefore, this function must make sure to add these padding
null bytes.
If the lexer reads from an input stream, this function reads the next
line of the input.
@pre @pre
p p p p p p u u u u u x . . . . . . p p p p p p u u u u u x . . . . . .
^ ^ ^ ^ ^ ^ ^ ^
@ -8553,26 +8560,38 @@ basic_json_parser_63:
*/ */
void fill_line_buffer() void fill_line_buffer()
{ {
// no stream is used or end of file is reached
if (m_stream == nullptr or not * m_stream)
{
return;
}
// number of processed characters (p) // number of processed characters (p)
const auto offset_start = m_start - m_content; const auto offset_start = m_start - m_content;
// offset for m_marker wrt. to m_start // offset for m_marker wrt. to m_start
const auto offset_marker = m_marker - m_start; const auto offset_marker = (m_marker == nullptr) ? 0 : m_marker - m_start;
// number of unprocessed characters (u) // number of unprocessed characters (u)
const auto offset_cursor = m_cursor - m_start; const auto offset_cursor = m_cursor - m_start;
// delete processed characters from line buffer // no stream is used or end of file is reached
m_line_buffer.erase(0, static_cast<size_t>(offset_start)); if (m_stream == nullptr or not * m_stream)
// read next line from input stream {
std::string line; // copy unprocessed characters to line buffer
std::getline(*m_stream, line); m_line_buffer.clear();
// add line with newline symbol to the line buffer for (m_cursor = m_start; m_cursor != m_limit; ++m_cursor)
m_line_buffer += "\n" + line; {
m_line_buffer.append(1, static_cast<const char>(*m_cursor));
}
// append 5 characters (size of longest keyword "false") to
// make sure that there is sufficient space between m_cursor
// and m_limit
m_line_buffer.append(5, '\0');
}
else
{
// delete processed characters from line buffer
m_line_buffer.erase(0, static_cast<size_t>(offset_start));
// read next line from input stream
std::string line;
std::getline(*m_stream, line);
// add line with newline symbol to the line buffer
m_line_buffer += "\n" + line;
}
// set pointers // set pointers
m_content = reinterpret_cast<const lexer_char_t*>(m_line_buffer.c_str()); m_content = reinterpret_cast<const lexer_char_t*>(m_line_buffer.c_str());
@ -8580,7 +8599,7 @@ basic_json_parser_63:
m_start = m_content; m_start = m_content;
m_marker = m_start + offset_marker; m_marker = m_start + offset_marker;
m_cursor = m_start + offset_cursor; m_cursor = m_start + offset_cursor;
m_limit = m_start + m_line_buffer.size() - 1; m_limit = m_start + m_line_buffer.size();
} }
/// return string representation of last read token /// return string representation of last read token

View file

@ -7835,6 +7835,13 @@ class basic_json
incremented without leaving the limits of the line buffer. Note re2c incremented without leaving the limits of the line buffer. Note re2c
decides when to call this function. decides when to call this function.
If the lexer reads from contiguous storage, there is no trailing null
byte. Therefore, this function must make sure to add these padding
null bytes.
If the lexer reads from an input stream, this function reads the next
line of the input.
@pre @pre
p p p p p p u u u u u x . . . . . . p p p p p p u u u u u x . . . . . .
^ ^ ^ ^ ^ ^ ^ ^
@ -7850,26 +7857,38 @@ class basic_json
*/ */
void fill_line_buffer() void fill_line_buffer()
{ {
// no stream is used or end of file is reached
if (m_stream == nullptr or not * m_stream)
{
return;
}
// number of processed characters (p) // number of processed characters (p)
const auto offset_start = m_start - m_content; const auto offset_start = m_start - m_content;
// offset for m_marker wrt. to m_start // offset for m_marker wrt. to m_start
const auto offset_marker = m_marker - m_start; const auto offset_marker = (m_marker == nullptr) ? 0 : m_marker - m_start;
// number of unprocessed characters (u) // number of unprocessed characters (u)
const auto offset_cursor = m_cursor - m_start; const auto offset_cursor = m_cursor - m_start;
// delete processed characters from line buffer // no stream is used or end of file is reached
m_line_buffer.erase(0, static_cast<size_t>(offset_start)); if (m_stream == nullptr or not * m_stream)
// read next line from input stream {
std::string line; // copy unprocessed characters to line buffer
std::getline(*m_stream, line); m_line_buffer.clear();
// add line with newline symbol to the line buffer for (m_cursor = m_start; m_cursor != m_limit; ++m_cursor)
m_line_buffer += "\n" + line; {
m_line_buffer.append(1, static_cast<const char>(*m_cursor));
}
// append 5 characters (size of longest keyword "false") to
// make sure that there is sufficient space between m_cursor
// and m_limit
m_line_buffer.append(5, '\0');
}
else
{
// delete processed characters from line buffer
m_line_buffer.erase(0, static_cast<size_t>(offset_start));
// read next line from input stream
std::string line;
std::getline(*m_stream, line);
// add line with newline symbol to the line buffer
m_line_buffer += "\n" + line;
}
// set pointers // set pointers
m_content = reinterpret_cast<const lexer_char_t*>(m_line_buffer.c_str()); m_content = reinterpret_cast<const lexer_char_t*>(m_line_buffer.c_str());
@ -7877,7 +7896,7 @@ class basic_json
m_start = m_content; m_start = m_content;
m_marker = m_start + offset_marker; m_marker = m_start + offset_marker;
m_cursor = m_start + offset_cursor; m_cursor = m_start + offset_cursor;
m_limit = m_start + m_line_buffer.size() - 1; m_limit = m_start + m_line_buffer.size();
} }
/// return string representation of last read token /// return string representation of last read token

View file

@ -761,19 +761,19 @@ TEST_CASE("parser class")
{ {
SECTION("from std::vector") SECTION("from std::vector")
{ {
std::vector<uint8_t> v = {'t', 'r', 'u', 'e', '\0'}; std::vector<uint8_t> v = {'t', 'r', 'u', 'e'};
CHECK(json::parser(std::begin(v), std::end(v)).parse() == json(true)); CHECK(json::parser(std::begin(v), std::end(v)).parse() == json(true));
} }
SECTION("from std::array") SECTION("from std::array")
{ {
std::array<uint8_t, 5> v { {'t', 'r', 'u', 'e', '\0'} }; std::array<uint8_t, 5> v { {'t', 'r', 'u', 'e'} };
CHECK(json::parser(std::begin(v), std::end(v)).parse() == json(true)); CHECK(json::parser(std::begin(v), std::end(v)).parse() == json(true));
} }
SECTION("from array") SECTION("from array")
{ {
uint8_t v[] = {'t', 'r', 'u', 'e', '\0'}; uint8_t v[] = {'t', 'r', 'u', 'e'};
CHECK(json::parser(std::begin(v), std::end(v)).parse() == json(true)); CHECK(json::parser(std::begin(v), std::end(v)).parse() == json(true));
} }
@ -790,13 +790,13 @@ TEST_CASE("parser class")
SECTION("from std::initializer_list") SECTION("from std::initializer_list")
{ {
std::initializer_list<uint8_t> v = {'t', 'r', 'u', 'e', '\0'}; std::initializer_list<uint8_t> v = {'t', 'r', 'u', 'e'};
CHECK(json::parser(std::begin(v), std::end(v)).parse() == json(true)); CHECK(json::parser(std::begin(v), std::end(v)).parse() == json(true));
} }
SECTION("from std::valarray") SECTION("from std::valarray")
{ {
std::valarray<uint8_t> v = {'t', 'r', 'u', 'e', '\0'}; std::valarray<uint8_t> v = {'t', 'r', 'u', 'e'};
CHECK(json::parser(std::begin(v), std::end(v)).parse() == json(true)); CHECK(json::parser(std::begin(v), std::end(v)).parse() == json(true));
} }
} }

View file

@ -86,19 +86,19 @@ TEST_CASE("deserialization")
{ {
SECTION("from std::vector") SECTION("from std::vector")
{ {
std::vector<uint8_t> v = {'t', 'r', 'u', 'e', '\0'}; std::vector<uint8_t> v = {'t', 'r', 'u', 'e'};
CHECK(json::parse(v) == json(true)); CHECK(json::parse(v) == json(true));
} }
SECTION("from std::array") SECTION("from std::array")
{ {
std::array<uint8_t, 5> v { {'t', 'r', 'u', 'e', '\0'} }; std::array<uint8_t, 5> v { {'t', 'r', 'u', 'e'} };
CHECK(json::parse(v) == json(true)); CHECK(json::parse(v) == json(true));
} }
SECTION("from array") SECTION("from array")
{ {
uint8_t v[] = {'t', 'r', 'u', 'e', '\0'}; uint8_t v[] = {'t', 'r', 'u', 'e'};
CHECK(json::parse(v) == json(true)); CHECK(json::parse(v) == json(true));
} }
@ -110,7 +110,7 @@ TEST_CASE("deserialization")
SECTION("from std::initializer_list") SECTION("from std::initializer_list")
{ {
std::initializer_list<uint8_t> v = {'t', 'r', 'u', 'e', '\0'}; std::initializer_list<uint8_t> v = {'t', 'r', 'u', 'e'};
CHECK(json::parse(v) == json(true)); CHECK(json::parse(v) == json(true));
} }
@ -125,19 +125,19 @@ TEST_CASE("deserialization")
{ {
SECTION("from std::vector") SECTION("from std::vector")
{ {
std::vector<uint8_t> v = {'t', 'r', 'u', 'e', '\0'}; std::vector<uint8_t> v = {'t', 'r', 'u', 'e'};
CHECK(json::parse(std::begin(v), std::end(v)) == json(true)); CHECK(json::parse(std::begin(v), std::end(v)) == json(true));
} }
SECTION("from std::array") SECTION("from std::array")
{ {
std::array<uint8_t, 5> v { {'t', 'r', 'u', 'e', '\0'} }; std::array<uint8_t, 5> v { {'t', 'r', 'u', 'e'} };
CHECK(json::parse(std::begin(v), std::end(v)) == json(true)); CHECK(json::parse(std::begin(v), std::end(v)) == json(true));
} }
SECTION("from array") SECTION("from array")
{ {
uint8_t v[] = {'t', 'r', 'u', 'e', '\0'}; uint8_t v[] = {'t', 'r', 'u', 'e'};
CHECK(json::parse(std::begin(v), std::end(v)) == json(true)); CHECK(json::parse(std::begin(v), std::end(v)) == json(true));
} }
@ -149,13 +149,13 @@ TEST_CASE("deserialization")
SECTION("from std::initializer_list") SECTION("from std::initializer_list")
{ {
std::initializer_list<uint8_t> v = {'t', 'r', 'u', 'e', '\0'}; std::initializer_list<uint8_t> v = {'t', 'r', 'u', 'e'};
CHECK(json::parse(std::begin(v), std::end(v)) == json(true)); CHECK(json::parse(std::begin(v), std::end(v)) == json(true));
} }
SECTION("from std::valarray") SECTION("from std::valarray")
{ {
std::valarray<uint8_t> v = {'t', 'r', 'u', 'e', '\0'}; std::valarray<uint8_t> v = {'t', 'r', 'u', 'e'};
CHECK(json::parse(std::begin(v), std::end(v)) == json(true)); CHECK(json::parse(std::begin(v), std::end(v)) == json(true));
} }