diff --git a/include/nlohmann/detail/output/serializer.hpp b/include/nlohmann/detail/output/serializer.hpp index d21cd8fe..f1c0b051 100644 --- a/include/nlohmann/detail/output/serializer.hpp +++ b/include/nlohmann/detail/output/serializer.hpp @@ -302,6 +302,7 @@ class serializer // number of bytes written at the point of the last valid byte std::size_t bytes_after_last_accept = 0; + std::size_t undumped_chars = 0; for (std::size_t i = 0; i < s.size(); ++i) { @@ -403,6 +404,7 @@ class serializer // remember the byte position of this accept bytes_after_last_accept = bytes; + undumped_chars = 0; break; } @@ -424,7 +426,7 @@ class serializer // would like to read it again, because the byte // may be OK for itself, but just not OK for the // previous sequence - if (bytes_after_last_accept != bytes) + if (undumped_chars > 0) { --i; } @@ -454,6 +456,8 @@ class serializer bytes_after_last_accept = bytes; } + undumped_chars = 0; + // continue processing the string state = UTF8_ACCEPT; continue; @@ -468,6 +472,7 @@ class serializer // code point will not be escaped - copy byte to buffer string_buffer[bytes++] = s[i]; } + ++undumped_chars; break; } } diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index f1335cd4..025dfeaf 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -10254,6 +10254,7 @@ class serializer // number of bytes written at the point of the last valid byte std::size_t bytes_after_last_accept = 0; + std::size_t undumped_chars = 0; for (std::size_t i = 0; i < s.size(); ++i) { @@ -10355,6 +10356,7 @@ class serializer // remember the byte position of this accept bytes_after_last_accept = bytes; + undumped_chars = 0; break; } @@ -10376,7 +10378,7 @@ class serializer // would like to read it again, because the byte // may be OK for itself, but just not OK for the // previous sequence - if (bytes_after_last_accept != bytes) + if (undumped_chars > 0) { --i; } @@ -10406,6 +10408,8 @@ class serializer bytes_after_last_accept = bytes; } + undumped_chars = 0; + // continue processing the string state = UTF8_ACCEPT; continue; @@ -10420,6 +10424,7 @@ class serializer // code point will not be escaped - copy byte to buffer string_buffer[bytes++] = s[i]; } + ++undumped_chars; break; } } diff --git a/test/src/unit-unicode.cpp b/test/src/unit-unicode.cpp index acc384bd..fe16eb0d 100644 --- a/test/src/unit-unicode.cpp +++ b/test/src/unit-unicode.cpp @@ -76,8 +76,12 @@ void check_utf8dump(bool success_expected, int byte1, int byte2 = -1, int byte3 // dumping with ignore/replace must not throw in any case auto s_ignored = j.dump(-1, ' ', false, json::error_handler_t::ignore); auto s_ignored2 = j2.dump(-1, ' ', false, json::error_handler_t::ignore); + auto s_ignored_ascii = j.dump(-1, ' ', true, json::error_handler_t::ignore); + auto s_ignored2_ascii = j2.dump(-1, ' ', true, json::error_handler_t::ignore); auto s_replaced = j.dump(-1, ' ', false, json::error_handler_t::replace); auto s_replaced2 = j2.dump(-1, ' ', false, json::error_handler_t::replace); + auto s_replaced_ascii = j.dump(-1, ' ', true, json::error_handler_t::replace); + auto s_replaced2_ascii = j2.dump(-1, ' ', true, json::error_handler_t::replace); if (success_expected) { @@ -102,8 +106,12 @@ void check_utf8dump(bool success_expected, int byte1, int byte2 = -1, int byte3 // check that prefix and suffix are preserved CHECK(s_ignored2.substr(1, 3) == "abc"); CHECK(s_ignored2.substr(s_ignored2.size() - 4, 3) == "xyz"); + CHECK(s_ignored2_ascii.substr(1, 3) == "abc"); + CHECK(s_ignored2_ascii.substr(s_ignored2_ascii.size() - 4, 3) == "xyz"); CHECK(s_replaced2.substr(1, 3) == "abc"); CHECK(s_replaced2.substr(s_replaced2.size() - 4, 3) == "xyz"); + CHECK(s_replaced2_ascii.substr(1, 3) == "abc"); + CHECK(s_replaced2_ascii.substr(s_replaced2_ascii.size() - 4, 3) == "xyz"); } void check_utf8string(bool success_expected, int byte1, int byte2, int byte3, int byte4);