diff --git a/README.md b/README.md index d1d1f866..ff17f3d4 100644 --- a/README.md +++ b/README.md @@ -500,7 +500,7 @@ I deeply appreciate the help of the following people. - [Vladimir Petrigo](https://github.com/vpetrigo) made a SFINAE hack more readable. - [Denis Andrejew](https://github.com/seeekr) fixed a grammar issue in the README file. - [Pierre-Antoine Lacaze](https://github.com/palacaze) found a subtle bug in the `dump()` function. -- [TurpentineDistillery](https://github.com/TurpentineDistillery) pointed to [`std::locale::classic()`](http://en.cppreference.com/w/cpp/locale/locale/classic) to avoid too much locale joggling and found some nice performance improvements in the parser. +- [TurpentineDistillery](https://github.com/TurpentineDistillery) pointed to [`std::locale::classic()`](http://en.cppreference.com/w/cpp/locale/locale/classic) to avoid too much locale joggling, found some nice performance improvements in the parser and improved the benchmarking code. Thanks a lot for helping out! diff --git a/benchmarks/benchmarks.cpp b/benchmarks/benchmarks.cpp index efb26cf2..745123c9 100644 --- a/benchmarks/benchmarks.cpp +++ b/benchmarks/benchmarks.cpp @@ -58,7 +58,7 @@ static void bench(benchpress::context& ctx, for (size_t i = 0; i < ctx.num_iterations(); ++i) { - // clear flags and rewind + // clear flags and rewind istr.clear(); istr.seekg(0); json j; diff --git a/src/json.hpp b/src/json.hpp index 8704134a..0f6c3c9c 100644 --- a/src/json.hpp +++ b/src/json.hpp @@ -8719,8 +8719,22 @@ basic_json_parser_66: */ void fill_line_buffer(size_t n = 0) { + // if line buffer is used, m_content points to its data + assert(m_line_buffer.empty() + or m_content == reinterpret_cast(m_line_buffer.data())); + + // if line buffer is used, m_limit is set past the end of its data + assert(m_line_buffer.empty() + or m_limit == m_content + m_line_buffer.size()); + + // pointer relationships + assert(m_content <= m_start); + assert(m_start <= m_cursor); + assert(m_cursor <= m_limit); + assert(m_marker == nullptr or m_marker <= m_limit); + // number of processed characters (p) - const auto offset_start = m_start - m_content; + const size_t num_processed_chars = static_cast(m_start - m_content); // offset for m_marker wrt. to m_start const auto offset_marker = (m_marker == nullptr) ? 0 : m_marker - m_start; // number of unprocessed characters (u) @@ -8729,23 +8743,23 @@ basic_json_parser_66: // no stream is used or end of file is reached if (m_stream == nullptr or m_stream->eof()) { - // skip this part if we are already using the line buffer - if (m_start != reinterpret_cast(m_line_buffer.data())) - { - // copy unprocessed characters to line buffer - m_line_buffer.assign(m_start, m_limit); - m_cursor = m_limit; - } + // m_start may or may not be pointing into m_line_buffer at + // this point. We trust the standand library to do the right + // thing. See http://stackoverflow.com/q/28142011/266378 + m_line_buffer.assign(m_start, m_limit); // append n characters to make sure that there is sufficient // space between m_cursor and m_limit m_line_buffer.append(1, '\x00'); - m_line_buffer.append(n - 1, '\x01'); + if (n > 0) + { + m_line_buffer.append(n - 1, '\x01'); + } } else { // delete processed characters from line buffer - m_line_buffer.erase(0, static_cast(offset_start)); + m_line_buffer.erase(0, num_processed_chars); // read next line from input stream m_line_buffer_tmp.clear(); std::getline(*m_stream, m_line_buffer_tmp, '\n'); @@ -8756,7 +8770,7 @@ basic_json_parser_66: } // set pointers - m_content = reinterpret_cast(m_line_buffer.c_str()); + m_content = reinterpret_cast(m_line_buffer.data()); assert(m_content != nullptr); m_start = m_content; m_marker = m_start + offset_marker; @@ -8843,7 +8857,11 @@ basic_json_parser_66: auto e = std::find(i, m_cursor - 1, '\\'); if (e != i) { - result.append(i, e); + // see https://github.com/nlohmann/json/issues/365#issuecomment-262874705 + for (auto k = i; k < e; k++) + { + result.push_back(static_cast(*k)); + } i = e - 1; // -1 because of ++i } else diff --git a/src/json.hpp.re2c b/src/json.hpp.re2c index 389adbc6..c83cd436 100644 --- a/src/json.hpp.re2c +++ b/src/json.hpp.re2c @@ -7868,8 +7868,22 @@ class basic_json */ void fill_line_buffer(size_t n = 0) { + // if line buffer is used, m_content points to its data + assert(m_line_buffer.empty() + or m_content == reinterpret_cast(m_line_buffer.data())); + + // if line buffer is used, m_limit is set past the end of its data + assert(m_line_buffer.empty() + or m_limit == m_content + m_line_buffer.size()); + + // pointer relationships + assert(m_content <= m_start); + assert(m_start <= m_cursor); + assert(m_cursor <= m_limit); + assert(m_marker == nullptr or m_marker <= m_limit); + // number of processed characters (p) - const auto offset_start = m_start - m_content; + const size_t num_processed_chars = static_cast(m_start - m_content); // offset for m_marker wrt. to m_start const auto offset_marker = (m_marker == nullptr) ? 0 : m_marker - m_start; // number of unprocessed characters (u) @@ -7878,23 +7892,23 @@ class basic_json // no stream is used or end of file is reached if (m_stream == nullptr or m_stream->eof()) { - // skip this part if we are already using the line buffer - if (m_start != reinterpret_cast(m_line_buffer.data())) - { - // copy unprocessed characters to line buffer - m_line_buffer.assign(m_start, m_limit); - m_cursor = m_limit; - } + // m_start may or may not be pointing into m_line_buffer at + // this point. We trust the standand library to do the right + // thing. See http://stackoverflow.com/q/28142011/266378 + m_line_buffer.assign(m_start, m_limit); // append n characters to make sure that there is sufficient // space between m_cursor and m_limit m_line_buffer.append(1, '\x00'); - m_line_buffer.append(n - 1, '\x01'); + if (n > 0) + { + m_line_buffer.append(n - 1, '\x01'); + } } else { // delete processed characters from line buffer - m_line_buffer.erase(0, static_cast(offset_start)); + m_line_buffer.erase(0, num_processed_chars); // read next line from input stream m_line_buffer_tmp.clear(); std::getline(*m_stream, m_line_buffer_tmp, '\n'); @@ -7905,7 +7919,7 @@ class basic_json } // set pointers - m_content = reinterpret_cast(m_line_buffer.c_str()); + m_content = reinterpret_cast(m_line_buffer.data()); assert(m_content != nullptr); m_start = m_content; m_marker = m_start + offset_marker; @@ -7992,7 +8006,11 @@ class basic_json auto e = std::find(i, m_cursor - 1, '\\'); if (e != i) { - result.append(i, e); + // see https://github.com/nlohmann/json/issues/365#issuecomment-262874705 + for (auto k = i; k < e; k++) + { + result.push_back(static_cast(*k)); + } i = e - 1; // -1 because of ++i } else