fixed lexer issue which required null byte at the end of contiguous storage containers #290
This commit is contained in:
parent
a79d634ccb
commit
1d66ab9f7a
4 changed files with 82 additions and 44 deletions
49
src/json.hpp
49
src/json.hpp
|
@ -8538,6 +8538,13 @@ basic_json_parser_63:
|
||||||
incremented without leaving the limits of the line buffer. Note re2c
|
incremented without leaving the limits of the line buffer. Note re2c
|
||||||
decides when to call this function.
|
decides when to call this function.
|
||||||
|
|
||||||
|
If the lexer reads from contiguous storage, there is no trailing null
|
||||||
|
byte. Therefore, this function must make sure to add these padding
|
||||||
|
null bytes.
|
||||||
|
|
||||||
|
If the lexer reads from an input stream, this function reads the next
|
||||||
|
line of the input.
|
||||||
|
|
||||||
@pre
|
@pre
|
||||||
p p p p p p u u u u u x . . . . . .
|
p p p p p p u u u u u x . . . . . .
|
||||||
^ ^ ^ ^
|
^ ^ ^ ^
|
||||||
|
@ -8553,26 +8560,38 @@ basic_json_parser_63:
|
||||||
*/
|
*/
|
||||||
void fill_line_buffer()
|
void fill_line_buffer()
|
||||||
{
|
{
|
||||||
// no stream is used or end of file is reached
|
|
||||||
if (m_stream == nullptr or not * m_stream)
|
|
||||||
{
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// number of processed characters (p)
|
// number of processed characters (p)
|
||||||
const auto offset_start = m_start - m_content;
|
const auto offset_start = m_start - m_content;
|
||||||
// offset for m_marker wrt. to m_start
|
// offset for m_marker wrt. to m_start
|
||||||
const auto offset_marker = m_marker - m_start;
|
const auto offset_marker = (m_marker == nullptr) ? 0 : m_marker - m_start;
|
||||||
// number of unprocessed characters (u)
|
// number of unprocessed characters (u)
|
||||||
const auto offset_cursor = m_cursor - m_start;
|
const auto offset_cursor = m_cursor - m_start;
|
||||||
|
|
||||||
// delete processed characters from line buffer
|
// no stream is used or end of file is reached
|
||||||
m_line_buffer.erase(0, static_cast<size_t>(offset_start));
|
if (m_stream == nullptr or not * m_stream)
|
||||||
// read next line from input stream
|
{
|
||||||
std::string line;
|
// copy unprocessed characters to line buffer
|
||||||
std::getline(*m_stream, line);
|
m_line_buffer.clear();
|
||||||
// add line with newline symbol to the line buffer
|
for (m_cursor = m_start; m_cursor != m_limit; ++m_cursor)
|
||||||
m_line_buffer += "\n" + line;
|
{
|
||||||
|
m_line_buffer.append(1, static_cast<const char>(*m_cursor));
|
||||||
|
}
|
||||||
|
|
||||||
|
// append 5 characters (size of longest keyword "false") to
|
||||||
|
// make sure that there is sufficient space between m_cursor
|
||||||
|
// and m_limit
|
||||||
|
m_line_buffer.append(5, '\0');
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// delete processed characters from line buffer
|
||||||
|
m_line_buffer.erase(0, static_cast<size_t>(offset_start));
|
||||||
|
// read next line from input stream
|
||||||
|
std::string line;
|
||||||
|
std::getline(*m_stream, line);
|
||||||
|
// add line with newline symbol to the line buffer
|
||||||
|
m_line_buffer += "\n" + line;
|
||||||
|
}
|
||||||
|
|
||||||
// set pointers
|
// set pointers
|
||||||
m_content = reinterpret_cast<const lexer_char_t*>(m_line_buffer.c_str());
|
m_content = reinterpret_cast<const lexer_char_t*>(m_line_buffer.c_str());
|
||||||
|
@ -8580,7 +8599,7 @@ basic_json_parser_63:
|
||||||
m_start = m_content;
|
m_start = m_content;
|
||||||
m_marker = m_start + offset_marker;
|
m_marker = m_start + offset_marker;
|
||||||
m_cursor = m_start + offset_cursor;
|
m_cursor = m_start + offset_cursor;
|
||||||
m_limit = m_start + m_line_buffer.size() - 1;
|
m_limit = m_start + m_line_buffer.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
/// return string representation of last read token
|
/// return string representation of last read token
|
||||||
|
|
|
@ -7835,6 +7835,13 @@ class basic_json
|
||||||
incremented without leaving the limits of the line buffer. Note re2c
|
incremented without leaving the limits of the line buffer. Note re2c
|
||||||
decides when to call this function.
|
decides when to call this function.
|
||||||
|
|
||||||
|
If the lexer reads from contiguous storage, there is no trailing null
|
||||||
|
byte. Therefore, this function must make sure to add these padding
|
||||||
|
null bytes.
|
||||||
|
|
||||||
|
If the lexer reads from an input stream, this function reads the next
|
||||||
|
line of the input.
|
||||||
|
|
||||||
@pre
|
@pre
|
||||||
p p p p p p u u u u u x . . . . . .
|
p p p p p p u u u u u x . . . . . .
|
||||||
^ ^ ^ ^
|
^ ^ ^ ^
|
||||||
|
@ -7850,26 +7857,38 @@ class basic_json
|
||||||
*/
|
*/
|
||||||
void fill_line_buffer()
|
void fill_line_buffer()
|
||||||
{
|
{
|
||||||
// no stream is used or end of file is reached
|
|
||||||
if (m_stream == nullptr or not * m_stream)
|
|
||||||
{
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// number of processed characters (p)
|
// number of processed characters (p)
|
||||||
const auto offset_start = m_start - m_content;
|
const auto offset_start = m_start - m_content;
|
||||||
// offset for m_marker wrt. to m_start
|
// offset for m_marker wrt. to m_start
|
||||||
const auto offset_marker = m_marker - m_start;
|
const auto offset_marker = (m_marker == nullptr) ? 0 : m_marker - m_start;
|
||||||
// number of unprocessed characters (u)
|
// number of unprocessed characters (u)
|
||||||
const auto offset_cursor = m_cursor - m_start;
|
const auto offset_cursor = m_cursor - m_start;
|
||||||
|
|
||||||
// delete processed characters from line buffer
|
// no stream is used or end of file is reached
|
||||||
m_line_buffer.erase(0, static_cast<size_t>(offset_start));
|
if (m_stream == nullptr or not * m_stream)
|
||||||
// read next line from input stream
|
{
|
||||||
std::string line;
|
// copy unprocessed characters to line buffer
|
||||||
std::getline(*m_stream, line);
|
m_line_buffer.clear();
|
||||||
// add line with newline symbol to the line buffer
|
for (m_cursor = m_start; m_cursor != m_limit; ++m_cursor)
|
||||||
m_line_buffer += "\n" + line;
|
{
|
||||||
|
m_line_buffer.append(1, static_cast<const char>(*m_cursor));
|
||||||
|
}
|
||||||
|
|
||||||
|
// append 5 characters (size of longest keyword "false") to
|
||||||
|
// make sure that there is sufficient space between m_cursor
|
||||||
|
// and m_limit
|
||||||
|
m_line_buffer.append(5, '\0');
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// delete processed characters from line buffer
|
||||||
|
m_line_buffer.erase(0, static_cast<size_t>(offset_start));
|
||||||
|
// read next line from input stream
|
||||||
|
std::string line;
|
||||||
|
std::getline(*m_stream, line);
|
||||||
|
// add line with newline symbol to the line buffer
|
||||||
|
m_line_buffer += "\n" + line;
|
||||||
|
}
|
||||||
|
|
||||||
// set pointers
|
// set pointers
|
||||||
m_content = reinterpret_cast<const lexer_char_t*>(m_line_buffer.c_str());
|
m_content = reinterpret_cast<const lexer_char_t*>(m_line_buffer.c_str());
|
||||||
|
@ -7877,7 +7896,7 @@ class basic_json
|
||||||
m_start = m_content;
|
m_start = m_content;
|
||||||
m_marker = m_start + offset_marker;
|
m_marker = m_start + offset_marker;
|
||||||
m_cursor = m_start + offset_cursor;
|
m_cursor = m_start + offset_cursor;
|
||||||
m_limit = m_start + m_line_buffer.size() - 1;
|
m_limit = m_start + m_line_buffer.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
/// return string representation of last read token
|
/// return string representation of last read token
|
||||||
|
|
|
@ -761,19 +761,19 @@ TEST_CASE("parser class")
|
||||||
{
|
{
|
||||||
SECTION("from std::vector")
|
SECTION("from std::vector")
|
||||||
{
|
{
|
||||||
std::vector<uint8_t> v = {'t', 'r', 'u', 'e', '\0'};
|
std::vector<uint8_t> v = {'t', 'r', 'u', 'e'};
|
||||||
CHECK(json::parser(std::begin(v), std::end(v)).parse() == json(true));
|
CHECK(json::parser(std::begin(v), std::end(v)).parse() == json(true));
|
||||||
}
|
}
|
||||||
|
|
||||||
SECTION("from std::array")
|
SECTION("from std::array")
|
||||||
{
|
{
|
||||||
std::array<uint8_t, 5> v { {'t', 'r', 'u', 'e', '\0'} };
|
std::array<uint8_t, 5> v { {'t', 'r', 'u', 'e'} };
|
||||||
CHECK(json::parser(std::begin(v), std::end(v)).parse() == json(true));
|
CHECK(json::parser(std::begin(v), std::end(v)).parse() == json(true));
|
||||||
}
|
}
|
||||||
|
|
||||||
SECTION("from array")
|
SECTION("from array")
|
||||||
{
|
{
|
||||||
uint8_t v[] = {'t', 'r', 'u', 'e', '\0'};
|
uint8_t v[] = {'t', 'r', 'u', 'e'};
|
||||||
CHECK(json::parser(std::begin(v), std::end(v)).parse() == json(true));
|
CHECK(json::parser(std::begin(v), std::end(v)).parse() == json(true));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -790,13 +790,13 @@ TEST_CASE("parser class")
|
||||||
|
|
||||||
SECTION("from std::initializer_list")
|
SECTION("from std::initializer_list")
|
||||||
{
|
{
|
||||||
std::initializer_list<uint8_t> v = {'t', 'r', 'u', 'e', '\0'};
|
std::initializer_list<uint8_t> v = {'t', 'r', 'u', 'e'};
|
||||||
CHECK(json::parser(std::begin(v), std::end(v)).parse() == json(true));
|
CHECK(json::parser(std::begin(v), std::end(v)).parse() == json(true));
|
||||||
}
|
}
|
||||||
|
|
||||||
SECTION("from std::valarray")
|
SECTION("from std::valarray")
|
||||||
{
|
{
|
||||||
std::valarray<uint8_t> v = {'t', 'r', 'u', 'e', '\0'};
|
std::valarray<uint8_t> v = {'t', 'r', 'u', 'e'};
|
||||||
CHECK(json::parser(std::begin(v), std::end(v)).parse() == json(true));
|
CHECK(json::parser(std::begin(v), std::end(v)).parse() == json(true));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -86,19 +86,19 @@ TEST_CASE("deserialization")
|
||||||
{
|
{
|
||||||
SECTION("from std::vector")
|
SECTION("from std::vector")
|
||||||
{
|
{
|
||||||
std::vector<uint8_t> v = {'t', 'r', 'u', 'e', '\0'};
|
std::vector<uint8_t> v = {'t', 'r', 'u', 'e'};
|
||||||
CHECK(json::parse(v) == json(true));
|
CHECK(json::parse(v) == json(true));
|
||||||
}
|
}
|
||||||
|
|
||||||
SECTION("from std::array")
|
SECTION("from std::array")
|
||||||
{
|
{
|
||||||
std::array<uint8_t, 5> v { {'t', 'r', 'u', 'e', '\0'} };
|
std::array<uint8_t, 5> v { {'t', 'r', 'u', 'e'} };
|
||||||
CHECK(json::parse(v) == json(true));
|
CHECK(json::parse(v) == json(true));
|
||||||
}
|
}
|
||||||
|
|
||||||
SECTION("from array")
|
SECTION("from array")
|
||||||
{
|
{
|
||||||
uint8_t v[] = {'t', 'r', 'u', 'e', '\0'};
|
uint8_t v[] = {'t', 'r', 'u', 'e'};
|
||||||
CHECK(json::parse(v) == json(true));
|
CHECK(json::parse(v) == json(true));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -110,7 +110,7 @@ TEST_CASE("deserialization")
|
||||||
|
|
||||||
SECTION("from std::initializer_list")
|
SECTION("from std::initializer_list")
|
||||||
{
|
{
|
||||||
std::initializer_list<uint8_t> v = {'t', 'r', 'u', 'e', '\0'};
|
std::initializer_list<uint8_t> v = {'t', 'r', 'u', 'e'};
|
||||||
CHECK(json::parse(v) == json(true));
|
CHECK(json::parse(v) == json(true));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -125,19 +125,19 @@ TEST_CASE("deserialization")
|
||||||
{
|
{
|
||||||
SECTION("from std::vector")
|
SECTION("from std::vector")
|
||||||
{
|
{
|
||||||
std::vector<uint8_t> v = {'t', 'r', 'u', 'e', '\0'};
|
std::vector<uint8_t> v = {'t', 'r', 'u', 'e'};
|
||||||
CHECK(json::parse(std::begin(v), std::end(v)) == json(true));
|
CHECK(json::parse(std::begin(v), std::end(v)) == json(true));
|
||||||
}
|
}
|
||||||
|
|
||||||
SECTION("from std::array")
|
SECTION("from std::array")
|
||||||
{
|
{
|
||||||
std::array<uint8_t, 5> v { {'t', 'r', 'u', 'e', '\0'} };
|
std::array<uint8_t, 5> v { {'t', 'r', 'u', 'e'} };
|
||||||
CHECK(json::parse(std::begin(v), std::end(v)) == json(true));
|
CHECK(json::parse(std::begin(v), std::end(v)) == json(true));
|
||||||
}
|
}
|
||||||
|
|
||||||
SECTION("from array")
|
SECTION("from array")
|
||||||
{
|
{
|
||||||
uint8_t v[] = {'t', 'r', 'u', 'e', '\0'};
|
uint8_t v[] = {'t', 'r', 'u', 'e'};
|
||||||
CHECK(json::parse(std::begin(v), std::end(v)) == json(true));
|
CHECK(json::parse(std::begin(v), std::end(v)) == json(true));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -149,13 +149,13 @@ TEST_CASE("deserialization")
|
||||||
|
|
||||||
SECTION("from std::initializer_list")
|
SECTION("from std::initializer_list")
|
||||||
{
|
{
|
||||||
std::initializer_list<uint8_t> v = {'t', 'r', 'u', 'e', '\0'};
|
std::initializer_list<uint8_t> v = {'t', 'r', 'u', 'e'};
|
||||||
CHECK(json::parse(std::begin(v), std::end(v)) == json(true));
|
CHECK(json::parse(std::begin(v), std::end(v)) == json(true));
|
||||||
}
|
}
|
||||||
|
|
||||||
SECTION("from std::valarray")
|
SECTION("from std::valarray")
|
||||||
{
|
{
|
||||||
std::valarray<uint8_t> v = {'t', 'r', 'u', 'e', '\0'};
|
std::valarray<uint8_t> v = {'t', 'r', 'u', 'e'};
|
||||||
CHECK(json::parse(std::begin(v), std::end(v)) == json(true));
|
CHECK(json::parse(std::begin(v), std::end(v)) == json(true));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue