From 71597be294f2b9b9f9b3fc9dc2017e414c4740f2 Mon Sep 17 00:00:00 2001
From: Ryan Mulder <ryanjmulder@gmail.com>
Date: Tue, 11 Jul 2017 13:41:56 -0400
Subject: [PATCH 1/3] add ensure_ascii parameter to dump. #330

---
 src/json.hpp                  | 105 +++++++++++++++++++++-------------
 test/src/unit-convenience.cpp |   7 ++-
 test/src/unit-inspection.cpp  |   7 +++
 3 files changed, 77 insertions(+), 42 deletions(-)

diff --git a/src/json.hpp b/src/json.hpp
index 39db611c..406b6fe0 100644
--- a/src/json.hpp
+++ b/src/json.hpp
@@ -6423,6 +6423,7 @@ class serializer
     @param[in] current_indent  the current indent level (only used internally)
     */
     void dump(const BasicJsonType& val, const bool pretty_print,
+              const bool ensure_ascii,
               const unsigned int indent_step,
               const unsigned int current_indent = 0)
     {
@@ -6453,9 +6454,9 @@ class serializer
                     {
                         o->write_characters(indent_string.c_str(), new_indent);
                         o->write_character('\"');
-                        dump_escaped(i->first);
+                        dump_escaped(i->first, ensure_ascii);
                         o->write_characters("\": ", 3);
-                        dump(i->second, true, indent_step, new_indent);
+                        dump(i->second, true, ensure_ascii, indent_step, new_indent);
                         o->write_characters(",\n", 2);
                     }
 
@@ -6463,9 +6464,9 @@ class serializer
                     assert(i != val.m_value.object->cend());
                     o->write_characters(indent_string.c_str(), new_indent);
                     o->write_character('\"');
-                    dump_escaped(i->first);
+                    dump_escaped(i->first, ensure_ascii);
                     o->write_characters("\": ", 3);
-                    dump(i->second, true, indent_step, new_indent);
+                    dump(i->second, true, ensure_ascii, indent_step, new_indent);
 
                     o->write_character('\n');
                     o->write_characters(indent_string.c_str(), current_indent);
@@ -6480,18 +6481,18 @@ class serializer
                     for (size_t cnt = 0; cnt < val.m_value.object->size() - 1; ++cnt, ++i)
                     {
                         o->write_character('\"');
-                        dump_escaped(i->first);
+                        dump_escaped(i->first, ensure_ascii);
                         o->write_characters("\":", 2);
-                        dump(i->second, false, indent_step, current_indent);
+                        dump(i->second, false, ensure_ascii, indent_step, current_indent);
                         o->write_character(',');
                     }
 
                     // last element
                     assert(i != val.m_value.object->cend());
                     o->write_character('\"');
-                    dump_escaped(i->first);
+                    dump_escaped(i->first, ensure_ascii);
                     o->write_characters("\":", 2);
-                    dump(i->second, false, indent_step, current_indent);
+                    dump(i->second, false, ensure_ascii, indent_step, current_indent);
 
                     o->write_character('}');
                 }
@@ -6523,14 +6524,14 @@ class serializer
                             i != val.m_value.array->cend() - 1; ++i)
                     {
                         o->write_characters(indent_string.c_str(), new_indent);
-                        dump(*i, true, indent_step, new_indent);
+                        dump(*i, true, ensure_ascii, indent_step, new_indent);
                         o->write_characters(",\n", 2);
                     }
 
                     // last element
                     assert(not val.m_value.array->empty());
                     o->write_characters(indent_string.c_str(), new_indent);
-                    dump(val.m_value.array->back(), true, indent_step, new_indent);
+                    dump(val.m_value.array->back(), true, ensure_ascii, indent_step, new_indent);
 
                     o->write_character('\n');
                     o->write_characters(indent_string.c_str(), current_indent);
@@ -6544,13 +6545,13 @@ class serializer
                     for (auto i = val.m_value.array->cbegin();
                             i != val.m_value.array->cend() - 1; ++i)
                     {
-                        dump(*i, false, indent_step, current_indent);
+                        dump(*i, false, ensure_ascii, indent_step, current_indent);
                         o->write_character(',');
                     }
 
                     // last element
                     assert(not val.m_value.array->empty());
-                    dump(val.m_value.array->back(), false, indent_step, current_indent);
+                    dump(val.m_value.array->back(), false, ensure_ascii, indent_step, current_indent);
 
                     o->write_character(']');
                 }
@@ -6561,7 +6562,7 @@ class serializer
             case value_t::string:
             {
                 o->write_character('\"');
-                dump_escaped(*val.m_value.string);
+                dump_escaped(*val.m_value.string, ensure_ascii);
                 o->write_character('\"');
                 return;
             }
@@ -6616,14 +6617,15 @@ class serializer
     @brief calculates the extra space to escape a JSON string
 
     @param[in] s  the string to escape
+    @param[in] ensure_ascii  whether to escape non-ASCII characters with \uXXXX sequences
     @return the number of characters required to escape string @a s
 
     @complexity Linear in the length of string @a s.
     */
-    static std::size_t extra_space(const string_t& s) noexcept
+    static std::size_t extra_space(const string_t& s, const bool ensure_ascii) noexcept
     {
         return std::accumulate(s.begin(), s.end(), size_t{},
-                               [](size_t res, typename string_t::value_type c)
+                               [ensure_ascii](size_t res, typename string_t::value_type c)
         {
             switch (c)
             {
@@ -6673,6 +6675,11 @@ class serializer
 
                 default:
                 {
+                    if (c & 0x80 and ensure_ascii)
+                    {
+                        // from c (1 byte) to \uxxxx (6 bytes)
+                        return res + 5;
+                    }
                     return res;
                 }
             }
@@ -6688,12 +6695,13 @@ class serializer
     representation. The escaped string is written to output stream @a o.
 
     @param[in] s  the string to escape
+    @param[in] ensure_ascii  whether to escape non-ASCII characters with \uXXXX sequences
 
     @complexity Linear in the length of string @a s.
     */
-    void dump_escaped(const string_t& s) const
+    void dump_escaped(const string_t& s, const bool ensure_ascii) const
     {
-        const auto space = extra_space(s);
+        const auto space = extra_space(s, ensure_ascii);
         if (space == 0)
         {
             o->write_characters(s.c_str(), s.size());
@@ -6704,6 +6712,27 @@ class serializer
         string_t result(s.size() + space, '\\');
         std::size_t pos = 0;
 
+        auto escape_character = [&result, &pos](const typename string_t::value_type c)
+        {
+            // convert a number 0..15 to its hex representation
+            // (0..f)
+            static const char hexify[16] =
+            {
+                '0', '1', '2', '3', '4', '5', '6', '7',
+                '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'
+            };
+
+            // print character c as \uxxxx
+            for (const char m :
+        { 'u', '0', '0', hexify[(c >> 4) & 0x0f], hexify[c & 0x0f]
+            })
+            {
+                result[++pos] = m;
+            }
+
+            ++pos;
+        };
+
         for (const auto& c : s)
         {
             switch (c)
@@ -6792,28 +6821,21 @@ class serializer
                 case 0x1e:
                 case 0x1f:
                 {
-                    // convert a number 0..15 to its hex representation
-                    // (0..f)
-                    static const char hexify[16] = {'0', '1', '2', '3', '4', '5', '6', '7',
-                                                    '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'
-                                                   };
-
-                    // print character c as \uxxxx
-                    for (const char m :
-                {'u', '0', '0', hexify[c >> 4], hexify[c & 0x0f]
-                    })
-                    {
-                        result[++pos] = m;
-                    }
-
-                    ++pos;
+                    escape_character(c);
                     break;
                 }
 
                 default:
                 {
-                    // all other characters are added as-is
-                    result[pos++] = c;
+                    if (c & 0x80 and ensure_ascii)
+                    {
+                        escape_character(c);
+                    }
+                    else
+                    {
+                        // all other characters are added as-is
+                        result[pos++] = c;
+                    }
                     break;
                 }
             }
@@ -9017,7 +9039,7 @@ class basic_json
 
     Serialization function for JSON values. The function tries to mimic
     Python's `json.dumps()` function, and currently supports its @a indent
-    parameter.
+    and @a ensure_ascii parameters.
 
     @param[in] indent If indent is nonnegative, then array elements and object
     members will be pretty-printed with that indent level. An indent level of
@@ -9025,30 +9047,33 @@ class basic_json
     representation.
     @param[in] indent_char The character to use for indentation if @a indent is
     greater than `0`. The default is ` ` (space).
+    @param[in] ensure_ascii If ensure_ascii is true (the default), all non-ASCII
+    characters in the output are escaped with \uXXXX sequences, and the result
+    consists of ASCII characters only.
 
     @return string containing the serialization of the JSON value
 
     @complexity Linear.
 
     @liveexample{The following example shows the effect of different @a indent
-    parameters to the result of the serialization.,dump}
+    parameters to the result of the serialization.dump}
 
     @see https://docs.python.org/2/library/json.html#json.dump
 
     @since version 1.0.0; indentation character added in version 3.0.0
     */
-    string_t dump(const int indent = -1, const char indent_char = ' ') const
+    string_t dump(const int indent = -1, const char indent_char = ' ', const bool ensure_ascii = false) const
     {
         string_t result;
         serializer s(detail::output_adapter_factory<char>::create(result), indent_char);
 
         if (indent >= 0)
         {
-            s.dump(*this, true, static_cast<unsigned int>(indent));
+            s.dump(*this, true, ensure_ascii, static_cast<unsigned int>(indent));
         }
         else
         {
-            s.dump(*this, false, 0);
+            s.dump(*this, false, ensure_ascii, 0);
         }
 
         return result;
@@ -12715,7 +12740,7 @@ class basic_json
 
         // do the actual serialization
         serializer s(detail::output_adapter_factory<char>::create(o), o.fill());
-        s.dump(j, pretty_print, static_cast<unsigned int>(indentation));
+        s.dump(j, pretty_print, false, static_cast<unsigned int>(indentation));
         return o;
     }
 
diff --git a/test/src/unit-convenience.cpp b/test/src/unit-convenience.cpp
index 52746028..6aee1f89 100644
--- a/test/src/unit-convenience.cpp
+++ b/test/src/unit-convenience.cpp
@@ -50,11 +50,12 @@ TEST_CASE("convenience functions")
     SECTION("string escape")
     {
         const auto check_escaped = [](const char* original,
-                                      const char* escaped)
+                                      const char* escaped,
+                                      const bool ensure_ascii = false)
         {
             std::stringstream ss;
             json::serializer s(nlohmann::detail::output_adapter_factory<char>::create(ss), ' ');
-            s.dump_escaped(original);
+            s.dump_escaped(original, ensure_ascii);
             CHECK(ss.str() == escaped);
         };
 
@@ -97,5 +98,7 @@ TEST_CASE("convenience functions")
         check_escaped("\x1d", "\\u001d");
         check_escaped("\x1e", "\\u001e");
         check_escaped("\x1f", "\\u001f");
+        check_escaped("\xA9", "\xA9");
+        check_escaped("\xA9", "\\u00a9", true);
     }
 }
diff --git a/test/src/unit-inspection.cpp b/test/src/unit-inspection.cpp
index 8b294bb8..54f01628 100644
--- a/test/src/unit-inspection.cpp
+++ b/test/src/unit-inspection.cpp
@@ -250,6 +250,13 @@ TEST_CASE("object inspection")
             CHECK(json("❤️").dump() == "\"❤️\"");
         }
 
+        SECTION("dump with ensure_ascii and non-ASCII characters")
+        {
+            CHECK(json("ä").dump(-1, ' ', true) == R"("\u00c3\u00a4")");
+            CHECK(json("Ö").dump(-1, ' ', true) == R"("\u00c3\u0096")");
+            CHECK(json("❤️").dump(-1, ' ', true) == R"("\u00e2\u009d\u00a4\u00ef\u00b8\u008f")");
+        }
+
         SECTION("serialization of discarded element")
         {
             json j_discarded(json::value_t::discarded);

From 40461c6c55721ac7418bb29c7502da373c3f88ef Mon Sep 17 00:00:00 2001
From: Ryan Mulder <ryanjmulder@gmail.com>
Date: Tue, 11 Jul 2017 13:59:07 -0400
Subject: [PATCH 2/3] update comment on dump to not indicate that ensure_ascii
 is true by default

---
 src/json.hpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/json.hpp b/src/json.hpp
index 406b6fe0..605b2aad 100644
--- a/src/json.hpp
+++ b/src/json.hpp
@@ -9047,9 +9047,9 @@ class basic_json
     representation.
     @param[in] indent_char The character to use for indentation if @a indent is
     greater than `0`. The default is ` ` (space).
-    @param[in] ensure_ascii If ensure_ascii is true (the default), all non-ASCII
-    characters in the output are escaped with \uXXXX sequences, and the result
-    consists of ASCII characters only.
+    @param[in] ensure_ascii If ensure_ascii is true, all non-ASCII characters
+    in the output are escaped with \uXXXX sequences, and the result consists
+    of ASCII characters only.
 
     @return string containing the serialization of the JSON value
 

From 486f3a2d167f040f4d3f0d74a06e4fbc1021c23d Mon Sep 17 00:00:00 2001
From: Ryan Mulder <ryanjmulder@gmail.com>
Date: Tue, 11 Jul 2017 14:18:02 -0400
Subject: [PATCH 3/3] restore necessary comman in documentation of dump

---
 src/json.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/json.hpp b/src/json.hpp
index 605b2aad..0c5ef484 100644
--- a/src/json.hpp
+++ b/src/json.hpp
@@ -9056,7 +9056,7 @@ class basic_json
     @complexity Linear.
 
     @liveexample{The following example shows the effect of different @a indent
-    parameters to the result of the serialization.dump}
+    parameters to the result of the serialization.,dump}
 
     @see https://docs.python.org/2/library/json.html#json.dump