diff --git a/include/nlohmann/detail/input/input_adapters.hpp b/include/nlohmann/detail/input/input_adapters.hpp index f8155faa..51d2a373 100644 --- a/include/nlohmann/detail/input/input_adapters.hpp +++ b/include/nlohmann/detail/input/input_adapters.hpp @@ -142,122 +142,9 @@ class wide_string_input_adapter : public input_adapter_protocol template void fill_buffer() { - fill_buffer_utf32(); - } - - template<> - void fill_buffer<2>() - { - fill_buffer_utf16(); + wide_string_input_helper::fill_buffer(str, current_wchar, utf8_bytes, utf8_bytes_index, utf8_bytes_filled); } - void fill_buffer_utf16() - { - utf8_bytes_index = 0; - - if (current_wchar == str.size()) - { - utf8_bytes[0] = std::char_traits::eof(); - utf8_bytes_filled = 1; - } - else - { - // get the current character - const int wc = static_cast(str[current_wchar++]); - - // UTF-16 to UTF-8 encoding - if (wc < 0x80) - { - utf8_bytes[0] = wc; - utf8_bytes_filled = 1; - } - else if (wc <= 0x7FF) - { - utf8_bytes[0] = 0xC0 | ((wc >> 6)); - utf8_bytes[1] = 0x80 | (wc & 0x3F); - utf8_bytes_filled = 2; - } - else if (0xD800 > wc or wc >= 0xE000) - { - utf8_bytes[0] = 0xE0 | ((wc >> 12)); - utf8_bytes[1] = 0x80 | ((wc >> 6) & 0x3F); - utf8_bytes[2] = 0x80 | (wc & 0x3F); - utf8_bytes_filled = 3; - } - else - { - if (current_wchar < str.size()) - { - const int wc2 = static_cast(str[current_wchar++]); - const int charcode = 0x10000 + (((wc & 0x3FF) << 10) | (wc2 & 0x3FF)); - utf8_bytes[0] = 0xf0 | (charcode >> 18); - utf8_bytes[1] = 0x80 | ((charcode >> 12) & 0x3F); - utf8_bytes[2] = 0x80 | ((charcode >> 6) & 0x3F); - utf8_bytes[3] = 0x80 | (charcode & 0x3F); - utf8_bytes_filled = 4; - } - else - { - // unknown character - ++current_wchar; - utf8_bytes[0] = wc; - utf8_bytes_filled = 1; - } - } - } - } - - void fill_buffer_utf32() - { - utf8_bytes_index = 0; - - if (current_wchar == str.size()) - { - utf8_bytes[0] = std::char_traits::eof(); - utf8_bytes_filled = 1; - } - else - { - // get the current character - const int wc = static_cast(str[current_wchar++]); - - // UTF-32 to UTF-8 encoding - if (wc < 0x80) - { - utf8_bytes[0] = wc; - utf8_bytes_filled = 1; - } - else if (wc <= 0x7FF) - { - utf8_bytes[0] = 0xC0 | ((wc >> 6) & 0x1F); - utf8_bytes[1] = 0x80 | (wc & 0x3F); - utf8_bytes_filled = 2; - } - else if (wc <= 0xFFFF) - { - utf8_bytes[0] = 0xE0 | ((wc >> 12) & 0x0F); - utf8_bytes[1] = 0x80 | ((wc >> 6) & 0x3F); - utf8_bytes[2] = 0x80 | (wc & 0x3F); - utf8_bytes_filled = 3; - } - else if (wc <= 0x10FFFF) - { - utf8_bytes[0] = 0xF0 | ((wc >> 18 ) & 0x07); - utf8_bytes[1] = 0x80 | ((wc >> 12) & 0x3F); - utf8_bytes[2] = 0x80 | ((wc >> 6) & 0x3F); - utf8_bytes[3] = 0x80 | (wc & 0x3F); - utf8_bytes_filled = 4; - } - else - { - // unknown character - utf8_bytes[0] = wc; - utf8_bytes_filled = 1; - } - } - } - - private: /// the wstring to process const WideStringType& str; @@ -273,6 +160,125 @@ class wide_string_input_adapter : public input_adapter_protocol std::size_t utf8_bytes_filled = 0; }; +namespace +{ + template + struct wide_string_input_helper + { + // UTF-32 + static void fill_buffer(const WideStringType& str, size_t& current_wchar, std::array::int_type, 4>& utf8_bytes, size_t& utf8_bytes_index, size_t& utf8_bytes_filled) + { + utf8_bytes_index = 0; + + if (current_wchar == str.size()) + { + utf8_bytes[0] = std::char_traits::eof(); + utf8_bytes_filled = 1; + } + else + { + // get the current character + const int wc = static_cast(str[current_wchar++]); + + // UTF-32 to UTF-8 encoding + if (wc < 0x80) + { + utf8_bytes[0] = wc; + utf8_bytes_filled = 1; + } + else if (wc <= 0x7FF) + { + utf8_bytes[0] = 0xC0 | ((wc >> 6) & 0x1F); + utf8_bytes[1] = 0x80 | (wc & 0x3F); + utf8_bytes_filled = 2; + } + else if (wc <= 0xFFFF) + { + utf8_bytes[0] = 0xE0 | ((wc >> 12) & 0x0F); + utf8_bytes[1] = 0x80 | ((wc >> 6) & 0x3F); + utf8_bytes[2] = 0x80 | (wc & 0x3F); + utf8_bytes_filled = 3; + } + else if (wc <= 0x10FFFF) + { + utf8_bytes[0] = 0xF0 | ((wc >> 18) & 0x07); + utf8_bytes[1] = 0x80 | ((wc >> 12) & 0x3F); + utf8_bytes[2] = 0x80 | ((wc >> 6) & 0x3F); + utf8_bytes[3] = 0x80 | (wc & 0x3F); + utf8_bytes_filled = 4; + } + else + { + // unknown character + utf8_bytes[0] = wc; + utf8_bytes_filled = 1; + } + } + } + }; + + template + struct wide_string_input_helper + { + // UTF-16 + static void fill_buffer(const WideStringType& str, size_t& current_wchar, std::array::int_type, 4>& utf8_bytes, size_t& utf8_bytes_index, size_t& utf8_bytes_filled) + { + utf8_bytes_index = 0; + + if (current_wchar == str.size()) + { + utf8_bytes[0] = std::char_traits::eof(); + utf8_bytes_filled = 1; + } + else + { + // get the current character + const int wc = static_cast(str[current_wchar++]); + + // UTF-16 to UTF-8 encoding + if (wc < 0x80) + { + utf8_bytes[0] = wc; + utf8_bytes_filled = 1; + } + else if (wc <= 0x7FF) + { + utf8_bytes[0] = 0xC0 | ((wc >> 6)); + utf8_bytes[1] = 0x80 | (wc & 0x3F); + utf8_bytes_filled = 2; + } + else if (0xD800 > wc or wc >= 0xE000) + { + utf8_bytes[0] = 0xE0 | ((wc >> 12)); + utf8_bytes[1] = 0x80 | ((wc >> 6) & 0x3F); + utf8_bytes[2] = 0x80 | (wc & 0x3F); + utf8_bytes_filled = 3; + } + else + { + if (current_wchar < str.size()) + { + const int wc2 = static_cast(str[current_wchar++]); + const int charcode = 0x10000 + (((wc & 0x3FF) << 10) | (wc2 & 0x3FF)); + utf8_bytes[0] = 0xf0 | (charcode >> 18); + utf8_bytes[1] = 0x80 | ((charcode >> 12) & 0x3F); + utf8_bytes[2] = 0x80 | ((charcode >> 6) & 0x3F); + utf8_bytes[3] = 0x80 | (charcode & 0x3F); + utf8_bytes_filled = 4; + } + else + { + // unknown character + ++current_wchar; + utf8_bytes[0] = wc; + utf8_bytes_filled = 1; + } + } + } + } + }; +} + class input_adapter { public: diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index c19ee6c9..c9132fa2 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -2020,122 +2020,9 @@ class wide_string_input_adapter : public input_adapter_protocol template void fill_buffer() { - fill_buffer_utf32(); - } - - template<> - void fill_buffer<2>() - { - fill_buffer_utf16(); + wide_string_input_helper::fill_buffer(str, current_wchar, utf8_bytes, utf8_bytes_index, utf8_bytes_filled); } - void fill_buffer_utf16() - { - utf8_bytes_index = 0; - - if (current_wchar == str.size()) - { - utf8_bytes[0] = std::char_traits::eof(); - utf8_bytes_filled = 1; - } - else - { - // get the current character - const int wc = static_cast(str[current_wchar++]); - - // UTF-16 to UTF-8 encoding - if (wc < 0x80) - { - utf8_bytes[0] = wc; - utf8_bytes_filled = 1; - } - else if (wc <= 0x7FF) - { - utf8_bytes[0] = 0xC0 | ((wc >> 6)); - utf8_bytes[1] = 0x80 | (wc & 0x3F); - utf8_bytes_filled = 2; - } - else if (0xD800 > wc or wc >= 0xE000) - { - utf8_bytes[0] = 0xE0 | ((wc >> 12)); - utf8_bytes[1] = 0x80 | ((wc >> 6) & 0x3F); - utf8_bytes[2] = 0x80 | (wc & 0x3F); - utf8_bytes_filled = 3; - } - else - { - if (current_wchar < str.size()) - { - const int wc2 = static_cast(str[current_wchar++]); - const int charcode = 0x10000 + (((wc & 0x3FF) << 10) | (wc2 & 0x3FF)); - utf8_bytes[0] = 0xf0 | (charcode >> 18); - utf8_bytes[1] = 0x80 | ((charcode >> 12) & 0x3F); - utf8_bytes[2] = 0x80 | ((charcode >> 6) & 0x3F); - utf8_bytes[3] = 0x80 | (charcode & 0x3F); - utf8_bytes_filled = 4; - } - else - { - // unknown character - ++current_wchar; - utf8_bytes[0] = wc; - utf8_bytes_filled = 1; - } - } - } - } - - void fill_buffer_utf32() - { - utf8_bytes_index = 0; - - if (current_wchar == str.size()) - { - utf8_bytes[0] = std::char_traits::eof(); - utf8_bytes_filled = 1; - } - else - { - // get the current character - const int wc = static_cast(str[current_wchar++]); - - // UTF-32 to UTF-8 encoding - if (wc < 0x80) - { - utf8_bytes[0] = wc; - utf8_bytes_filled = 1; - } - else if (wc <= 0x7FF) - { - utf8_bytes[0] = 0xC0 | ((wc >> 6) & 0x1F); - utf8_bytes[1] = 0x80 | (wc & 0x3F); - utf8_bytes_filled = 2; - } - else if (wc <= 0xFFFF) - { - utf8_bytes[0] = 0xE0 | ((wc >> 12) & 0x0F); - utf8_bytes[1] = 0x80 | ((wc >> 6) & 0x3F); - utf8_bytes[2] = 0x80 | (wc & 0x3F); - utf8_bytes_filled = 3; - } - else if (wc <= 0x10FFFF) - { - utf8_bytes[0] = 0xF0 | ((wc >> 18 ) & 0x07); - utf8_bytes[1] = 0x80 | ((wc >> 12) & 0x3F); - utf8_bytes[2] = 0x80 | ((wc >> 6) & 0x3F); - utf8_bytes[3] = 0x80 | (wc & 0x3F); - utf8_bytes_filled = 4; - } - else - { - // unknown character - utf8_bytes[0] = wc; - utf8_bytes_filled = 1; - } - } - } - - private: /// the wstring to process const WideStringType& str; @@ -2151,6 +2038,125 @@ class wide_string_input_adapter : public input_adapter_protocol std::size_t utf8_bytes_filled = 0; }; +namespace +{ + template + struct wide_string_input_helper + { + // UTF-32 + static void fill_buffer(const WideStringType& str, size_t& current_wchar, std::array::int_type, 4>& utf8_bytes, size_t& utf8_bytes_index, size_t& utf8_bytes_filled) + { + utf8_bytes_index = 0; + + if (current_wchar == str.size()) + { + utf8_bytes[0] = std::char_traits::eof(); + utf8_bytes_filled = 1; + } + else + { + // get the current character + const int wc = static_cast(str[current_wchar++]); + + // UTF-32 to UTF-8 encoding + if (wc < 0x80) + { + utf8_bytes[0] = wc; + utf8_bytes_filled = 1; + } + else if (wc <= 0x7FF) + { + utf8_bytes[0] = 0xC0 | ((wc >> 6) & 0x1F); + utf8_bytes[1] = 0x80 | (wc & 0x3F); + utf8_bytes_filled = 2; + } + else if (wc <= 0xFFFF) + { + utf8_bytes[0] = 0xE0 | ((wc >> 12) & 0x0F); + utf8_bytes[1] = 0x80 | ((wc >> 6) & 0x3F); + utf8_bytes[2] = 0x80 | (wc & 0x3F); + utf8_bytes_filled = 3; + } + else if (wc <= 0x10FFFF) + { + utf8_bytes[0] = 0xF0 | ((wc >> 18) & 0x07); + utf8_bytes[1] = 0x80 | ((wc >> 12) & 0x3F); + utf8_bytes[2] = 0x80 | ((wc >> 6) & 0x3F); + utf8_bytes[3] = 0x80 | (wc & 0x3F); + utf8_bytes_filled = 4; + } + else + { + // unknown character + utf8_bytes[0] = wc; + utf8_bytes_filled = 1; + } + } + } + }; + + template + struct wide_string_input_helper + { + // UTF-16 + static void fill_buffer(const WideStringType& str, size_t& current_wchar, std::array::int_type, 4>& utf8_bytes, size_t& utf8_bytes_index, size_t& utf8_bytes_filled) + { + utf8_bytes_index = 0; + + if (current_wchar == str.size()) + { + utf8_bytes[0] = std::char_traits::eof(); + utf8_bytes_filled = 1; + } + else + { + // get the current character + const int wc = static_cast(str[current_wchar++]); + + // UTF-16 to UTF-8 encoding + if (wc < 0x80) + { + utf8_bytes[0] = wc; + utf8_bytes_filled = 1; + } + else if (wc <= 0x7FF) + { + utf8_bytes[0] = 0xC0 | ((wc >> 6)); + utf8_bytes[1] = 0x80 | (wc & 0x3F); + utf8_bytes_filled = 2; + } + else if (0xD800 > wc or wc >= 0xE000) + { + utf8_bytes[0] = 0xE0 | ((wc >> 12)); + utf8_bytes[1] = 0x80 | ((wc >> 6) & 0x3F); + utf8_bytes[2] = 0x80 | (wc & 0x3F); + utf8_bytes_filled = 3; + } + else + { + if (current_wchar < str.size()) + { + const int wc2 = static_cast(str[current_wchar++]); + const int charcode = 0x10000 + (((wc & 0x3FF) << 10) | (wc2 & 0x3FF)); + utf8_bytes[0] = 0xf0 | (charcode >> 18); + utf8_bytes[1] = 0x80 | ((charcode >> 12) & 0x3F); + utf8_bytes[2] = 0x80 | ((charcode >> 6) & 0x3F); + utf8_bytes[3] = 0x80 | (charcode & 0x3F); + utf8_bytes_filled = 4; + } + else + { + // unknown character + ++current_wchar; + utf8_bytes[0] = wc; + utf8_bytes_filled = 1; + } + } + } + } + }; +} + class input_adapter { public: