🚧 fixed an issue with ensure_ascii #1198

2018-10-22 15:53:36 +02:00 · 2018-10-22 15:53:36 +02:00 · c51b1e6fab
commit c51b1e6fab
parent c7af027cbb
3 changed files with 20 additions and 2 deletions
--- a/include/nlohmann/detail/output/serializer.hpp
+++ b/include/nlohmann/detail/output/serializer.hpp
@ -302,6 +302,7 @@ class serializer

        // number of bytes written at the point of the last valid byte
        std::size_t bytes_after_last_accept = 0;
+        std::size_t undumped_chars = 0;

        for (std::size_t i = 0; i < s.size(); ++i)
        {
@ -403,6 +404,7 @@ class serializer

                    // remember the byte position of this accept
                    bytes_after_last_accept = bytes;
+                    undumped_chars = 0;
                    break;
                }

@ -424,7 +426,7 @@ class serializer
                            // would like to read it again, because the byte
                            // may be OK for itself, but just not OK for the
                            // previous sequence
-                            if (bytes_after_last_accept != bytes)
+                            if (undumped_chars > 0)
                            {
                                --i;
                            }
@ -454,6 +456,8 @@ class serializer
                                bytes_after_last_accept = bytes;
                            }

+                            undumped_chars = 0;
+
                            // continue processing the string
                            state = UTF8_ACCEPT;
                            continue;
@ -468,6 +472,7 @@ class serializer
                        // code point will not be escaped - copy byte to buffer
                        string_buffer[bytes++] = s[i];
                    }
+                    ++undumped_chars;
                    break;
                }
            }
--- a/single_include/nlohmann/json.hpp
+++ b/single_include/nlohmann/json.hpp
@ -10254,6 +10254,7 @@ class serializer

        // number of bytes written at the point of the last valid byte
        std::size_t bytes_after_last_accept = 0;
+        std::size_t undumped_chars = 0;

        for (std::size_t i = 0; i < s.size(); ++i)
        {
@ -10355,6 +10356,7 @@ class serializer

                    // remember the byte position of this accept
                    bytes_after_last_accept = bytes;
+                    undumped_chars = 0;
                    break;
                }

@ -10376,7 +10378,7 @@ class serializer
                            // would like to read it again, because the byte
                            // may be OK for itself, but just not OK for the
                            // previous sequence
-                            if (bytes_after_last_accept != bytes)
+                            if (undumped_chars > 0)
                            {
                                --i;
                            }
@ -10406,6 +10408,8 @@ class serializer
                                bytes_after_last_accept = bytes;
                            }

+                            undumped_chars = 0;
+
                            // continue processing the string
                            state = UTF8_ACCEPT;
                            continue;
@ -10420,6 +10424,7 @@ class serializer
                        // code point will not be escaped - copy byte to buffer
                        string_buffer[bytes++] = s[i];
                    }
+                    ++undumped_chars;
                    break;
                }
            }
--- a/test/src/unit-unicode.cpp
+++ b/test/src/unit-unicode.cpp
@ -76,8 +76,12 @@ void check_utf8dump(bool success_expected, int byte1, int byte2 = -1, int byte3
    // dumping with ignore/replace must not throw in any case
    auto s_ignored = j.dump(-1, ' ', false, json::error_handler_t::ignore);
    auto s_ignored2 = j2.dump(-1, ' ', false, json::error_handler_t::ignore);
+    auto s_ignored_ascii = j.dump(-1, ' ', true, json::error_handler_t::ignore);
+    auto s_ignored2_ascii = j2.dump(-1, ' ', true, json::error_handler_t::ignore);
    auto s_replaced = j.dump(-1, ' ', false, json::error_handler_t::replace);
    auto s_replaced2 = j2.dump(-1, ' ', false, json::error_handler_t::replace);
+    auto s_replaced_ascii = j.dump(-1, ' ', true, json::error_handler_t::replace);
+    auto s_replaced2_ascii = j2.dump(-1, ' ', true, json::error_handler_t::replace);

    if (success_expected)
    {
@ -102,8 +106,12 @@ void check_utf8dump(bool success_expected, int byte1, int byte2 = -1, int byte3
    // check that prefix and suffix are preserved
    CHECK(s_ignored2.substr(1, 3) == "abc");
    CHECK(s_ignored2.substr(s_ignored2.size() - 4, 3) == "xyz");
+    CHECK(s_ignored2_ascii.substr(1, 3) == "abc");
+    CHECK(s_ignored2_ascii.substr(s_ignored2_ascii.size() - 4, 3) == "xyz");
    CHECK(s_replaced2.substr(1, 3) == "abc");
    CHECK(s_replaced2.substr(s_replaced2.size() - 4, 3) == "xyz");
+    CHECK(s_replaced2_ascii.substr(1, 3) == "abc");
+    CHECK(s_replaced2_ascii.substr(s_replaced2_ascii.size() - 4, 3) == "xyz");
 }

 void check_utf8string(bool success_expected, int byte1, int byte2, int byte3, int byte4);