diff --git a/include/nlohmann/detail/output/serializer.hpp b/include/nlohmann/detail/output/serializer.hpp index c022c307..d21cd8fe 100644 --- a/include/nlohmann/detail/output/serializer.hpp +++ b/include/nlohmann/detail/output/serializer.hpp @@ -436,12 +436,21 @@ class serializer if (error_handler == error_handler_t::replace) { // add a replacement character - string_buffer[bytes++] = '\\'; - string_buffer[bytes++] = 'u'; - string_buffer[bytes++] = 'f'; - string_buffer[bytes++] = 'f'; - string_buffer[bytes++] = 'f'; - string_buffer[bytes++] = 'd'; + if (ensure_ascii) + { + string_buffer[bytes++] = '\\'; + string_buffer[bytes++] = 'u'; + string_buffer[bytes++] = 'f'; + string_buffer[bytes++] = 'f'; + string_buffer[bytes++] = 'f'; + string_buffer[bytes++] = 'd'; + } + else + { + string_buffer[bytes++] = '\xEF'; + string_buffer[bytes++] = '\xBF'; + string_buffer[bytes++] = '\xBD'; + } bytes_after_last_accept = bytes; } @@ -497,7 +506,14 @@ class serializer // write all accepted bytes o->write_characters(string_buffer.data(), bytes_after_last_accept); // add a replacement character - o->write_characters("\\ufffd", 6); + if (ensure_ascii) + { + o->write_characters("\\ufffd", 6); + } + else + { + o->write_characters("\xEF\xBF\xBD", 3); + } break; } } diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index 4e86fafc..f1335cd4 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -10388,12 +10388,21 @@ class serializer if (error_handler == error_handler_t::replace) { // add a replacement character - string_buffer[bytes++] = '\\'; - string_buffer[bytes++] = 'u'; - string_buffer[bytes++] = 'f'; - string_buffer[bytes++] = 'f'; - string_buffer[bytes++] = 'f'; - string_buffer[bytes++] = 'd'; + if (ensure_ascii) + { + string_buffer[bytes++] = '\\'; + string_buffer[bytes++] = 'u'; + string_buffer[bytes++] = 'f'; + string_buffer[bytes++] = 'f'; + string_buffer[bytes++] = 'f'; + string_buffer[bytes++] = 'd'; + } + else + { + string_buffer[bytes++] = '\xEF'; + string_buffer[bytes++] = '\xBF'; + string_buffer[bytes++] = '\xBD'; + } bytes_after_last_accept = bytes; } @@ -10449,7 +10458,14 @@ class serializer // write all accepted bytes o->write_characters(string_buffer.data(), bytes_after_last_accept); // add a replacement character - o->write_characters("\\ufffd", 6); + if (ensure_ascii) + { + o->write_characters("\\ufffd", 6); + } + else + { + o->write_characters("\xEF\xBF\xBD", 3); + } break; } } diff --git a/test/src/unit-unicode.cpp b/test/src/unit-unicode.cpp index c71f6f1d..acc384bd 100644 --- a/test/src/unit-unicode.cpp +++ b/test/src/unit-unicode.cpp @@ -86,10 +86,6 @@ void check_utf8dump(bool success_expected, int byte1, int byte2 = -1, int byte3 // all dumps should agree on the string CHECK(s_strict == s_ignored); CHECK(s_strict == s_replaced); - - // check that ignore/replace string does not contain a replacement character - CHECK(s_ignored.find("\\ufffd") == std::string::npos); - CHECK(s_replaced.find("\\ufffd") == std::string::npos); } else { @@ -98,10 +94,8 @@ void check_utf8dump(bool success_expected, int byte1, int byte2 = -1, int byte3 // ignore and replace must create different dumps CHECK(s_ignored != s_replaced); - // check that ignore string does not contain a replacement character - CHECK(s_ignored.find("\\ufffd") == std::string::npos); // check that replace string contains a replacement character - CHECK(s_replaced.find("\\ufffd") != std::string::npos); + CHECK(s_replaced.find("\xEF\xBF\xBD") != std::string::npos); }