🚧 proposal for different error handlers #1198

Proof of concept; currently only as parameter to the internal dump_escaped function; that is, not yet exposed to the dump function.
2018-10-16 20:38:50 +02:00 · 2018-10-16 20:38:50 +02:00 · 0671e92ced
commit 0671e92ced
parent dd672939a0
2 changed files with 120 additions and 14 deletions
--- a/include/nlohmann/detail/output/serializer.hpp
+++ b/include/nlohmann/detail/output/serializer.hpp
@ -39,6 +39,14 @@ class serializer
    static constexpr uint8_t UTF8_REJECT = 1;
  public:
    /// how to treat decoding errors
    enum class error_handler_t
    {
        strict,  ///< throw a type_error exception in case of invalid UTF-8
        replace, ///< replace invalid UTF-8 sequences with U+FFFD
        ignore   ///< ignore invalid UTF-8 sequences
    };
    /*!
    @param[in] s  output stream to serialize to
    @param[in] ichar  indentation character to use
@ -278,10 +286,12 @@ class serializer
    @param[in] s  the string to escape
    @param[in] ensure_ascii  whether to escape non-ASCII characters with
                             \uXXXX sequences
    @param[in] error_handler how to react on decoding errors
    @complexity Linear in the length of string @a s.
    */
-    void dump_escaped(const string_t& s, const bool ensure_ascii)
+    void dump_escaped(const string_t& s, const bool ensure_ascii,
                      const error_handler_t error_handler = error_handler_t::strict)
    {
        uint32_t codepoint;
        uint8_t state = UTF8_ACCEPT;
@ -388,12 +398,36 @@ class serializer
                }
                case UTF8_REJECT:  // decode found invalid UTF-8 byte
                {
                    switch (error_handler)
                    {
                        case error_handler_t::strict:
                        {
                            std::string sn(3, '\0');
                            snprintf(&sn[0], sn.size(), "%.2X", byte);
                            JSON_THROW(type_error::create(316, "invalid UTF-8 byte at index " + std::to_string(i) + ": 0x" + sn));
                        }
                        case error_handler_t::ignore:
                        {
                            state = UTF8_ACCEPT;
                            continue;
                        }
                        case error_handler_t::replace:
                        {
                            string_buffer[bytes++] = '\\';
                            string_buffer[bytes++] = 'u';
                            string_buffer[bytes++] = 'f';
                            string_buffer[bytes++] = 'f';
                            string_buffer[bytes++] = 'f';
                            string_buffer[bytes++] = 'd';
                            state = UTF8_ACCEPT;
                            continue;
                        }
                    }
                }
                default:  // decode found yet incomplete multi-byte code point
                {
                    if (not ensure_ascii)
@ -417,10 +451,29 @@ class serializer
        else
        {
            // we finish reading, but do not accept: string was incomplete
            switch (error_handler)
            {
                case error_handler_t::strict:
                {
                    std::string sn(3, '\0');
                    snprintf(&sn[0], sn.size(), "%.2X", static_cast<uint8_t>(s.back()));
                    JSON_THROW(type_error::create(316, "incomplete UTF-8 string; last byte: 0x" + sn));
                }
                case error_handler_t::ignore:
                {
                    break;
                }
                case error_handler_t::replace:
                {
                    // write buffer, but replace last byte
                    o->write_characters(string_buffer.data(), bytes - 1);
                    o->write_characters("\\ufffd", 6);
                    break;
                }
            }
        }
    }
    /*!
--- a/single_include/nlohmann/json.hpp
+++ b/single_include/nlohmann/json.hpp
@ -9991,6 +9991,14 @@ class serializer
    static constexpr uint8_t UTF8_REJECT = 1;
  public:
    /// how to treat decoding errors
    enum class error_handler_t
    {
        strict,  ///< throw a type_error exception in case of invalid UTF-8
        replace, ///< replace invalid UTF-8 sequences with U+FFFD
        ignore   ///< ignore invalid UTF-8 sequences
    };
    /*!
    @param[in] s  output stream to serialize to
    @param[in] ichar  indentation character to use
@ -10230,10 +10238,12 @@ class serializer
    @param[in] s  the string to escape
    @param[in] ensure_ascii  whether to escape non-ASCII characters with
                             \uXXXX sequences
    @param[in] error_handler how to react on decoding errors
    @complexity Linear in the length of string @a s.
    */
-    void dump_escaped(const string_t& s, const bool ensure_ascii)
+    void dump_escaped(const string_t& s, const bool ensure_ascii,
                      const error_handler_t error_handler = error_handler_t::strict)
    {
        uint32_t codepoint;
        uint8_t state = UTF8_ACCEPT;
@ -10340,12 +10350,36 @@ class serializer
                }
                case UTF8_REJECT:  // decode found invalid UTF-8 byte
                {
                    switch (error_handler)
                    {
                        case error_handler_t::strict:
                        {
                            std::string sn(3, '\0');
                            snprintf(&sn[0], sn.size(), "%.2X", byte);
                            JSON_THROW(type_error::create(316, "invalid UTF-8 byte at index " + std::to_string(i) + ": 0x" + sn));
                        }
                        case error_handler_t::ignore:
                        {
                            state = UTF8_ACCEPT;
                            continue;
                        }
                        case error_handler_t::replace:
                        {
                            string_buffer[bytes++] = '\\';
                            string_buffer[bytes++] = 'u';
                            string_buffer[bytes++] = 'f';
                            string_buffer[bytes++] = 'f';
                            string_buffer[bytes++] = 'f';
                            string_buffer[bytes++] = 'd';
                            state = UTF8_ACCEPT;
                            continue;
                        }
                    }
                }
                default:  // decode found yet incomplete multi-byte code point
                {
                    if (not ensure_ascii)
@ -10369,10 +10403,29 @@ class serializer
        else
        {
            // we finish reading, but do not accept: string was incomplete
            switch (error_handler)
            {
                case error_handler_t::strict:
                {
                    std::string sn(3, '\0');
                    snprintf(&sn[0], sn.size(), "%.2X", static_cast<uint8_t>(s.back()));
                    JSON_THROW(type_error::create(316, "incomplete UTF-8 string; last byte: 0x" + sn));
                }
                case error_handler_t::ignore:
                {
                    break;
                }
                case error_handler_t::replace:
                {
                    // write buffer, but replace last byte
                    o->write_characters(string_buffer.data(), bytes - 1);
                    o->write_characters("\\ufffd", 6);
                    break;
                }
            }
        }
    }
    /*!