From 476507031890fd6effb7b447a156168803b0bd37 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Wed, 4 Jan 2017 18:07:46 +0100 Subject: [PATCH] :memo: added documentation wrt. UTF-8 strings #406 --- README.md | 1 + src/json.hpp | 9 +++++++-- src/json.hpp.re2c | 6 ++++++ 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 498d1bbe..071d847a 100644 --- a/README.md +++ b/README.md @@ -600,6 +600,7 @@ Thanks a lot for helping out! - Other encodings such as Latin-1, UTF-16, or UTF-32 are not supported and will yield parse errors. - [Unicode noncharacters](http://www.unicode.org/faq/private_use.html#nonchar1) will not be replaced by the library. - Invalid surrogates (e.g., incomplete pairs such as `\uDEAD`) will yield parse errors. + - The strings stored in the library are UTF-8 encoded. When using the default string type (`std::string`), note that its length/size functions return the number of stored bytes rather than the number of characters or glyphs. ## Execute unit tests diff --git a/src/json.hpp b/src/json.hpp index 4aa293d2..6b1dc663 100644 --- a/src/json.hpp +++ b/src/json.hpp @@ -450,6 +450,12 @@ class basic_json std::string @endcode + #### Encoding + + Strings are stored in UTF-8 encoding. Therefore, functions like + `std::string::size()` or `std::string::length()` return the number of + bytes in the string rather than the number of characters or glyphs. + #### String comparison [RFC 7159](http://rfc7159.net/rfc7159) states: @@ -7515,7 +7521,6 @@ class basic_json case 0xf9: // Half-Precision Float (two-byte IEEE 754) { - check_length(v.size(), 2, 1); idx += 2; // skip two content bytes // code from RFC 7049, Appendix D, Figure 3: @@ -7525,7 +7530,7 @@ class basic_json // include at least decoding support for them even without such // support. An example of a small decoder for half-precision // floating-point numbers in the C language is shown in Fig. 3. - const int half = (v[current_idx + 1] << 8) + v[current_idx + 2]; + const int half = (v.at(current_idx + 1) << 8) + v.at(current_idx + 2); const int exp = (half >> 10) & 0x1f; const int mant = half & 0x3ff; double val; diff --git a/src/json.hpp.re2c b/src/json.hpp.re2c index 73c4131a..0eed0e6d 100644 --- a/src/json.hpp.re2c +++ b/src/json.hpp.re2c @@ -450,6 +450,12 @@ class basic_json std::string @endcode + #### Encoding + + Strings are stored in UTF-8 encoding. Therefore, functions like + `std::string::size()` or `std::string::length()` return the number of + bytes in the string rather than the number of characters or glyphs. + #### String comparison [RFC 7159](http://rfc7159.net/rfc7159) states: