diff --git a/include/nlohmann/detail/input/input_adapters.hpp b/include/nlohmann/detail/input/input_adapters.hpp index c2a20ab7..bf069e83 100644 --- a/include/nlohmann/detail/input/input_adapters.hpp +++ b/include/nlohmann/detail/input/input_adapters.hpp @@ -115,38 +115,66 @@ class input_buffer_adapter : public input_adapter_protocol const char* const limit; }; -template -class wide_string_input_adapter : public input_adapter_protocol +template +struct wide_string_input_helper { - public: - explicit wide_string_input_adapter(const WideStringType& w) : str(w) {} - - std::char_traits::int_type get_character() noexcept override + // UTF-32 + static void fill_buffer(const WideStringType& str, size_t& current_wchar, std::array::int_type, 4>& utf8_bytes, size_t& utf8_bytes_index, size_t& utf8_bytes_filled) { - // check if buffer needs to be filled - if (utf8_bytes_index == utf8_bytes_filled) + utf8_bytes_index = 0; + + if (current_wchar == str.size()) { - if (sizeof(typename WideStringType::value_type) == 2) + utf8_bytes[0] = std::char_traits::eof(); + utf8_bytes_filled = 1; + } + else + { + // get the current character + const int wc = static_cast(str[current_wchar++]); + + // UTF-32 to UTF-8 encoding + if (wc < 0x80) { - fill_buffer_utf16(); + utf8_bytes[0] = wc; + utf8_bytes_filled = 1; + } + else if (wc <= 0x7FF) + { + utf8_bytes[0] = 0xC0 | ((wc >> 6) & 0x1F); + utf8_bytes[1] = 0x80 | (wc & 0x3F); + utf8_bytes_filled = 2; + } + else if (wc <= 0xFFFF) + { + utf8_bytes[0] = 0xE0 | ((wc >> 12) & 0x0F); + utf8_bytes[1] = 0x80 | ((wc >> 6) & 0x3F); + utf8_bytes[2] = 0x80 | (wc & 0x3F); + utf8_bytes_filled = 3; + } + else if (wc <= 0x10FFFF) + { + utf8_bytes[0] = 0xF0 | ((wc >> 18) & 0x07); + utf8_bytes[1] = 0x80 | ((wc >> 12) & 0x3F); + utf8_bytes[2] = 0x80 | ((wc >> 6) & 0x3F); + utf8_bytes[3] = 0x80 | (wc & 0x3F); + utf8_bytes_filled = 4; } else { - fill_buffer_utf32(); + // unknown character + utf8_bytes[0] = wc; + utf8_bytes_filled = 1; } - - assert(utf8_bytes_filled > 0); - assert(utf8_bytes_index == 0); } - - // use buffer - assert(utf8_bytes_filled > 0); - assert(utf8_bytes_index < utf8_bytes_filled); - return utf8_bytes[utf8_bytes_index++]; } +}; - private: - void fill_buffer_utf16() +template +struct wide_string_input_helper +{ + // UTF-16 + static void fill_buffer(const WideStringType& str, size_t& current_wchar, std::array::int_type, 4>& utf8_bytes, size_t& utf8_bytes_index, size_t& utf8_bytes_filled) { utf8_bytes_index = 0; @@ -201,58 +229,38 @@ class wide_string_input_adapter : public input_adapter_protocol } } } +}; - void fill_buffer_utf32() +template +class wide_string_input_adapter : public input_adapter_protocol +{ + public: + explicit wide_string_input_adapter(const WideStringType& w) : str(w) {} + + std::char_traits::int_type get_character() noexcept override { - utf8_bytes_index = 0; - - if (current_wchar == str.size()) + // check if buffer needs to be filled + if (utf8_bytes_index == utf8_bytes_filled) { - utf8_bytes[0] = std::char_traits::eof(); - utf8_bytes_filled = 1; - } - else - { - // get the current character - const int wc = static_cast(str[current_wchar++]); + fill_buffer(); - // UTF-32 to UTF-8 encoding - if (wc < 0x80) - { - utf8_bytes[0] = wc; - utf8_bytes_filled = 1; - } - else if (wc <= 0x7FF) - { - utf8_bytes[0] = 0xC0 | ((wc >> 6) & 0x1F); - utf8_bytes[1] = 0x80 | (wc & 0x3F); - utf8_bytes_filled = 2; - } - else if (wc <= 0xFFFF) - { - utf8_bytes[0] = 0xE0 | ((wc >> 12) & 0x0F); - utf8_bytes[1] = 0x80 | ((wc >> 6) & 0x3F); - utf8_bytes[2] = 0x80 | (wc & 0x3F); - utf8_bytes_filled = 3; - } - else if (wc <= 0x10FFFF) - { - utf8_bytes[0] = 0xF0 | ((wc >> 18 ) & 0x07); - utf8_bytes[1] = 0x80 | ((wc >> 12) & 0x3F); - utf8_bytes[2] = 0x80 | ((wc >> 6) & 0x3F); - utf8_bytes[3] = 0x80 | (wc & 0x3F); - utf8_bytes_filled = 4; - } - else - { - // unknown character - utf8_bytes[0] = wc; - utf8_bytes_filled = 1; - } + assert(utf8_bytes_filled > 0); + assert(utf8_bytes_index == 0); } + + // use buffer + assert(utf8_bytes_filled > 0); + assert(utf8_bytes_index < utf8_bytes_filled); + return utf8_bytes[utf8_bytes_index++]; } private: + template + void fill_buffer() + { + wide_string_input_helper::fill_buffer(str, current_wchar, utf8_bytes, utf8_bytes_index, utf8_bytes_filled); + } + /// the wstring to process const WideStringType& str; diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index c8b0cc9e..d276a673 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -1993,38 +1993,66 @@ class input_buffer_adapter : public input_adapter_protocol const char* const limit; }; -template -class wide_string_input_adapter : public input_adapter_protocol +template +struct wide_string_input_helper { - public: - explicit wide_string_input_adapter(const WideStringType& w) : str(w) {} - - std::char_traits::int_type get_character() noexcept override + // UTF-32 + static void fill_buffer(const WideStringType& str, size_t& current_wchar, std::array::int_type, 4>& utf8_bytes, size_t& utf8_bytes_index, size_t& utf8_bytes_filled) { - // check if buffer needs to be filled - if (utf8_bytes_index == utf8_bytes_filled) + utf8_bytes_index = 0; + + if (current_wchar == str.size()) { - if (sizeof(typename WideStringType::value_type) == 2) + utf8_bytes[0] = std::char_traits::eof(); + utf8_bytes_filled = 1; + } + else + { + // get the current character + const int wc = static_cast(str[current_wchar++]); + + // UTF-32 to UTF-8 encoding + if (wc < 0x80) { - fill_buffer_utf16(); + utf8_bytes[0] = wc; + utf8_bytes_filled = 1; + } + else if (wc <= 0x7FF) + { + utf8_bytes[0] = 0xC0 | ((wc >> 6) & 0x1F); + utf8_bytes[1] = 0x80 | (wc & 0x3F); + utf8_bytes_filled = 2; + } + else if (wc <= 0xFFFF) + { + utf8_bytes[0] = 0xE0 | ((wc >> 12) & 0x0F); + utf8_bytes[1] = 0x80 | ((wc >> 6) & 0x3F); + utf8_bytes[2] = 0x80 | (wc & 0x3F); + utf8_bytes_filled = 3; + } + else if (wc <= 0x10FFFF) + { + utf8_bytes[0] = 0xF0 | ((wc >> 18) & 0x07); + utf8_bytes[1] = 0x80 | ((wc >> 12) & 0x3F); + utf8_bytes[2] = 0x80 | ((wc >> 6) & 0x3F); + utf8_bytes[3] = 0x80 | (wc & 0x3F); + utf8_bytes_filled = 4; } else { - fill_buffer_utf32(); + // unknown character + utf8_bytes[0] = wc; + utf8_bytes_filled = 1; } - - assert(utf8_bytes_filled > 0); - assert(utf8_bytes_index == 0); } - - // use buffer - assert(utf8_bytes_filled > 0); - assert(utf8_bytes_index < utf8_bytes_filled); - return utf8_bytes[utf8_bytes_index++]; } +}; - private: - void fill_buffer_utf16() +template +struct wide_string_input_helper +{ + // UTF-16 + static void fill_buffer(const WideStringType& str, size_t& current_wchar, std::array::int_type, 4>& utf8_bytes, size_t& utf8_bytes_index, size_t& utf8_bytes_filled) { utf8_bytes_index = 0; @@ -2079,58 +2107,38 @@ class wide_string_input_adapter : public input_adapter_protocol } } } +}; - void fill_buffer_utf32() +template +class wide_string_input_adapter : public input_adapter_protocol +{ + public: + explicit wide_string_input_adapter(const WideStringType& w) : str(w) {} + + std::char_traits::int_type get_character() noexcept override { - utf8_bytes_index = 0; - - if (current_wchar == str.size()) + // check if buffer needs to be filled + if (utf8_bytes_index == utf8_bytes_filled) { - utf8_bytes[0] = std::char_traits::eof(); - utf8_bytes_filled = 1; - } - else - { - // get the current character - const int wc = static_cast(str[current_wchar++]); + fill_buffer(); - // UTF-32 to UTF-8 encoding - if (wc < 0x80) - { - utf8_bytes[0] = wc; - utf8_bytes_filled = 1; - } - else if (wc <= 0x7FF) - { - utf8_bytes[0] = 0xC0 | ((wc >> 6) & 0x1F); - utf8_bytes[1] = 0x80 | (wc & 0x3F); - utf8_bytes_filled = 2; - } - else if (wc <= 0xFFFF) - { - utf8_bytes[0] = 0xE0 | ((wc >> 12) & 0x0F); - utf8_bytes[1] = 0x80 | ((wc >> 6) & 0x3F); - utf8_bytes[2] = 0x80 | (wc & 0x3F); - utf8_bytes_filled = 3; - } - else if (wc <= 0x10FFFF) - { - utf8_bytes[0] = 0xF0 | ((wc >> 18 ) & 0x07); - utf8_bytes[1] = 0x80 | ((wc >> 12) & 0x3F); - utf8_bytes[2] = 0x80 | ((wc >> 6) & 0x3F); - utf8_bytes[3] = 0x80 | (wc & 0x3F); - utf8_bytes_filled = 4; - } - else - { - // unknown character - utf8_bytes[0] = wc; - utf8_bytes_filled = 1; - } + assert(utf8_bytes_filled > 0); + assert(utf8_bytes_index == 0); } + + // use buffer + assert(utf8_bytes_filled > 0); + assert(utf8_bytes_index < utf8_bytes_filled); + return utf8_bytes[utf8_bytes_index++]; } private: + template + void fill_buffer() + { + wide_string_input_helper::fill_buffer(str, current_wchar, utf8_bytes, utf8_bytes_index, utf8_bytes_filled); + } + /// the wstring to process const WideStringType& str; @@ -2146,6 +2154,8 @@ class wide_string_input_adapter : public input_adapter_protocol std::size_t utf8_bytes_filled = 0; }; + + class input_adapter { public: