🐛 fix for #656

A complete rewrite of the string escape function. It now provides codepoint-to-\uxxxx escaping. Invalid UTF-8 byte sequences are not escaped, but copied as-is. I haven’t spent much time optimizing the code - but the library now agrees with Python on every single Unicode character’s escaping (see file test/data/json_nlohmann_tests/all_unicode_ascii.json).

Other minor changes: replaced "size_t" by "std::size_t"
This commit is contained in:
Niels Lohmann 2017-07-17 07:53:02 +02:00
parent 8a9133c6b2
commit 21d23982ca
No known key found for this signature in database
GPG key ID: 7F3CEA63AE251B69
4 changed files with 1112330 additions and 131 deletions

File diff suppressed because it is too large Load diff

View file

@ -98,7 +98,9 @@ TEST_CASE("convenience functions")
check_escaped("\x1d", "\\u001d");
check_escaped("\x1e", "\\u001e");
check_escaped("\x1f", "\\u001f");
check_escaped("\xA9", "\xA9");
check_escaped("\xA9", "\\u00a9", true);
// invalid UTF-8 characters
check_escaped("ä\xA9ü", "ä\xA9ü");
check_escaped("ä\xA9ü", "\\u00e4\xA9\\u00fc", true);
}
}

View file

@ -28,6 +28,7 @@ SOFTWARE.
#include "catch.hpp"
#include <fstream>
#include "json.hpp"
using nlohmann::json;
@ -252,9 +253,35 @@ TEST_CASE("object inspection")
SECTION("dump with ensure_ascii and non-ASCII characters")
{
CHECK(json("ä").dump(-1, ' ', true) == R"("\u00c3\u00a4")");
CHECK(json("Ö").dump(-1, ' ', true) == R"("\u00c3\u0096")");
CHECK(json("❤️").dump(-1, ' ', true) == R"("\u00e2\u009d\u00a4\u00ef\u00b8\u008f")");
CHECK(json("ä").dump(-1, ' ', true) == "\"\\u00e4\"");
CHECK(json("Ö").dump(-1, ' ', true) == "\"\\u00d6\"");
CHECK(json("❤️").dump(-1, ' ', true) == "\"\\u2764\\ufe0f\"");
}
SECTION("full Unicode escaping to ASCII")
{
SECTION("parsing yields the same JSON value")
{
std::ifstream f_escaped("test/data/json_nlohmann_tests/all_unicode_ascii.json");
std::ifstream f_unescaped("test/data/json_nlohmann_tests/all_unicode.json");
json j1 = json::parse(f_escaped);
json j2 = json::parse(f_unescaped);
CHECK(j1 == j2);
}
SECTION("dumping yields the same JSON text")
{
std::ifstream f_escaped("test/data/json_nlohmann_tests/all_unicode_ascii.json");
std::ifstream f_unescaped("test/data/json_nlohmann_tests/all_unicode.json");
json value = json::parse(f_unescaped);
std::string text = value.dump(4, ' ', true);
std::string expected((std::istreambuf_iterator<char>(f_escaped)),
std::istreambuf_iterator<char>());
CHECK(text == expected);
}
}
SECTION("serialization of discarded element")