From 5684d9a498349ab76a02f98140932762d1bc029a Mon Sep 17 00:00:00 2001 From: Francois Chabot Date: Wed, 27 May 2020 12:40:04 -0400 Subject: [PATCH] unified input API --- .../nlohmann/detail/input/input_adapters.hpp | 249 ++++++++---------- include/nlohmann/json.hpp | 157 ++++------- single_include/nlohmann/json.hpp | 245 ++++++++--------- test/CMakeLists.txt | 1 + test/Makefile | 1 + test/src/unit-user_defined_input.cpp | 115 ++++++++ 6 files changed, 402 insertions(+), 366 deletions(-) create mode 100644 test/src/unit-user_defined_input.cpp diff --git a/include/nlohmann/detail/input/input_adapters.hpp b/include/nlohmann/detail/input/input_adapters.hpp index 7ad26d00..9d868202 100644 --- a/include/nlohmann/detail/input/input_adapters.hpp +++ b/include/nlohmann/detail/input/input_adapters.hpp @@ -34,6 +34,8 @@ Input adapter for stdio file access. This adapter read only 1 byte and do not us class file_input_adapter { public: + using char_type = char; + JSON_HEDLEY_NON_NULL(2) explicit file_input_adapter(std::FILE* f) noexcept : m_file(f) @@ -68,6 +70,8 @@ subsequent call for input from the std::istream. class input_stream_adapter { public: + using char_type = char; + ~input_stream_adapter() { // clear stream flags; we use underlying streambuf I/O, do not @@ -113,51 +117,61 @@ class input_stream_adapter std::streambuf* sb = nullptr; }; -/// input adapter for buffer input -class input_buffer_adapter +// General-purpose iterator-based adapter. It might not be as fast as +// theoretically possible for some containers, but it is extremely versatile. +template +class iterator_input_adapter { public: - input_buffer_adapter(const char* b, const std::size_t l) noexcept - : cursor(b), limit(b == nullptr ? nullptr : (b + l)) - {} + using char_type = typename std::iterator_traits::value_type; - // delete because of pointer members - input_buffer_adapter(const input_buffer_adapter&) = delete; - input_buffer_adapter& operator=(input_buffer_adapter&) = delete; - input_buffer_adapter(input_buffer_adapter&&) = default; - input_buffer_adapter& operator=(input_buffer_adapter&&) = delete; + iterator_input_adapter(IteratorType first, IteratorType last) + : current(std::move(first)), end(std::move(last)) {} - std::char_traits::int_type get_character() noexcept + typename std::char_traits::int_type get_character() { - if (JSON_HEDLEY_LIKELY(cursor < limit)) + if (current != end) { - assert(cursor != nullptr and limit != nullptr); - return std::char_traits::to_int_type(*(cursor++)); + auto result = std::char_traits::to_int_type(*current); + std::advance(current, 1); + return result; + } + else + { + return std::char_traits::eof(); } - - return std::char_traits::eof(); } private: - /// pointer to the current character - const char* cursor; - /// pointer past the last character - const char* const limit; + IteratorType current; + IteratorType end; + + template + friend class wide_string_input_helper; + + bool empty() const + { + return current == end; + } + }; -template -struct wide_string_input_helper + +template +struct wide_string_input_helper; + +template +struct wide_string_input_helper { // UTF-32 - static void fill_buffer(const WideStringType& str, - size_t& current_wchar, + static void fill_buffer(BaseInputAdapter& input, std::array::int_type, 4>& utf8_bytes, size_t& utf8_bytes_index, size_t& utf8_bytes_filled) { utf8_bytes_index = 0; - if (current_wchar == str.size()) + if (input.empty()) { utf8_bytes[0] = std::char_traits::eof(); utf8_bytes_filled = 1; @@ -165,7 +179,7 @@ struct wide_string_input_helper else { // get the current character - const auto wc = static_cast(str[current_wchar++]); + const auto wc = input.get_character(); // UTF-32 to UTF-8 encoding if (wc < 0x80) @@ -204,19 +218,18 @@ struct wide_string_input_helper } }; -template -struct wide_string_input_helper +template +struct wide_string_input_helper { // UTF-16 - static void fill_buffer(const WideStringType& str, - size_t& current_wchar, + static void fill_buffer(BaseInputAdapter& input, std::array::int_type, 4>& utf8_bytes, size_t& utf8_bytes_index, size_t& utf8_bytes_filled) { utf8_bytes_index = 0; - if (current_wchar == str.size()) + if (input.empty()) { utf8_bytes[0] = std::char_traits::eof(); utf8_bytes_filled = 1; @@ -224,7 +237,7 @@ struct wide_string_input_helper else { // get the current character - const auto wc = static_cast(str[current_wchar++]); + const auto wc = input.get_character(); // UTF-16 to UTF-8 encoding if (wc < 0x80) @@ -247,9 +260,9 @@ struct wide_string_input_helper } else { - if (current_wchar < str.size()) + if (!input.empty()) { - const auto wc2 = static_cast(str[current_wchar++]); + const auto wc2 = static_cast(input.get_character()); const auto charcode = 0x10000u + (((wc & 0x3FFu) << 10u) | (wc2 & 0x3FFu)); utf8_bytes[0] = static_cast::int_type>(0xF0u | (charcode >> 18u)); utf8_bytes[1] = static_cast::int_type>(0x80u | ((charcode >> 12u) & 0x3Fu)); @@ -259,8 +272,6 @@ struct wide_string_input_helper } else { - // unknown character - ++current_wchar; utf8_bytes[0] = static_cast::int_type>(wc); utf8_bytes_filled = 1; } @@ -269,20 +280,20 @@ struct wide_string_input_helper } }; -template +// Wraps another input apdater to convert wide character types into individual bytes. +template class wide_string_input_adapter { public: - explicit wide_string_input_adapter(const WideStringType& w) noexcept - : str(w) - {} + wide_string_input_adapter(BaseInputAdapter base) + : base_adapter(base) {} - std::char_traits::int_type get_character() noexcept + typename std::char_traits::int_type get_character() noexcept { // check if buffer needs to be filled if (utf8_bytes_index == utf8_bytes_filled) { - fill_buffer(); + fill_buffer(); assert(utf8_bytes_filled > 0); assert(utf8_bytes_index == 0); @@ -295,18 +306,14 @@ class wide_string_input_adapter } private: + BaseInputAdapter base_adapter; + template void fill_buffer() { - wide_string_input_helper::fill_buffer(str, current_wchar, utf8_bytes, utf8_bytes_index, utf8_bytes_filled); + wide_string_input_helper::fill_buffer(base_adapter, utf8_bytes, utf8_bytes_index, utf8_bytes_filled); } - /// the wstring to process - const WideStringType& str; - - /// index of the current wchar in str - std::size_t current_wchar = 0; - /// a buffer for UTF-8 bytes std::array::int_type, 4> utf8_bytes = {{0, 0, 0, 0}}; @@ -316,6 +323,52 @@ class wide_string_input_adapter std::size_t utf8_bytes_filled = 0; }; + +template +struct iterator_input_adapter_factory +{ + using iterator_type = IteratorType; + using char_type = typename std::iterator_traits::value_type; + using adapter_type = iterator_input_adapter; + + static adapter_type create(IteratorType begin, IteratorType end) + { + return adapter_type(std::move(begin), std::move(end)); + } +}; + +template +struct iterator_input_adapter_factory::value_type)>1)>::type > + { + + using iterator_type = IteratorType; + using char_type = typename std::iterator_traits::value_type; + using base_adapter_type = iterator_input_adapter; + using adapter_type = wide_string_input_adapter; + + static adapter_type create(IteratorType begin, IteratorType end) +{ + return adapter_type(base_adapter_type(std::move(begin), std::move(end))); +} + }; + +// General purpose iterator-based input +template +typename iterator_input_adapter_factory::adapter_type input_adapter(IteratorType begin, IteratorType end) +{ + using factory_type = iterator_input_adapter_factory; + return factory_type::create(begin, end); +} + +// Convenience shorthand from container to iterator +template +auto input_adapter(const ContainerType& container) -> decltype(input_adapter(begin(container), end(container))) +{ + return input_adapter(begin(container), end(container)); +} + +// Special cases with fast paths inline file_input_adapter input_adapter(std::FILE* file) { return file_input_adapter(file); @@ -331,97 +384,22 @@ inline input_stream_adapter input_adapter(std::istream&& stream) return input_stream_adapter(stream); } +using contiguous_bytes_input_adapter = decltype(input_adapter(std::declval(), std::declval())); + +// Null-delimited strings, and the like. template::value and std::is_integral::type>::value and sizeof(typename std::remove_pointer::type) == 1, int>::type = 0> -input_buffer_adapter input_adapter(CharT b, std::size_t l) +contiguous_bytes_input_adapter input_adapter(CharT b) { - return input_buffer_adapter(reinterpret_cast(b), l); + auto length = std::strlen(reinterpret_cast(b)); + auto ptr = reinterpret_cast(b); + return input_adapter(ptr, ptr + length); } -template::value and - std::is_integral::type>::value and - sizeof(typename std::remove_pointer::type) == 1, - int>::type = 0> -input_buffer_adapter input_adapter(CharT b) -{ - return input_adapter(reinterpret_cast(b), - std::strlen(reinterpret_cast(b))); -} - -template::iterator_category, std::random_access_iterator_tag>::value, - int>::type = 0> -input_buffer_adapter input_adapter(IteratorType first, IteratorType last) -{ -#ifndef NDEBUG - // assertion to check that the iterator range is indeed contiguous, - // see https://stackoverflow.com/a/35008842/266378 for more discussion - const auto is_contiguous = std::accumulate( - first, last, std::pair(true, 0), - [&first](std::pair res, decltype(*first) val) - { - res.first &= (val == *(std::next(std::addressof(*first), res.second++))); - return res; - }).first; - assert(is_contiguous); -#endif - - // assertion to check that each element is 1 byte long - static_assert( - sizeof(typename iterator_traits::value_type) == 1, - "each element in the iterator range must have the size of 1 byte"); - - const auto len = static_cast(std::distance(first, last)); - if (JSON_HEDLEY_LIKELY(len > 0)) - { - // there is at least one element: use the address of first - return input_buffer_adapter(reinterpret_cast(&(*first)), len); - } - else - { - // the address of first cannot be used: use nullptr - return input_buffer_adapter(nullptr, len); - } -} - -inline wide_string_input_adapter input_adapter(const std::wstring& ws) -{ - return wide_string_input_adapter(ws); -} - - -inline wide_string_input_adapter input_adapter(const std::u16string& ws) -{ - return wide_string_input_adapter(ws); -} - -inline wide_string_input_adapter input_adapter(const std::u32string& ws) -{ - return wide_string_input_adapter(ws); -} - -template::value and - std::is_base_of()))>::iterator_category>::value, - int>::type = 0> -input_buffer_adapter input_adapter(const ContiguousContainer& c) -{ - return input_adapter(std::begin(c), std::end(c)); -} - - -template -input_buffer_adapter input_adapter(T (&array)[N]) -{ - return input_adapter(std::begin(array), std::end(array)); -} // This class only handles inputs of input_buffer_adapter type. // It's required so that expressions like {ptr, len} can be implicitely casted @@ -436,7 +414,7 @@ class span_input_adapter sizeof(typename std::remove_pointer::type) == 1, int>::type = 0> span_input_adapter(CharT b, std::size_t l) - : ia(reinterpret_cast(b), l) {} + : ia(reinterpret_cast(b), reinterpret_cast(b) + l) {} template::type) == 1, int>::type = 0> span_input_adapter(CharT b) - : span_input_adapter(reinterpret_cast(b), - std::strlen(reinterpret_cast(b))) {} + : span_input_adapter(b, std::strlen(reinterpret_cast(b))) {} template + JSON_HEDLEY_WARN_UNUSED_RESULT + static basic_json parse(IteratorType begin, + IteratorType end, + const parser_callback_t cb = nullptr, + const bool allow_exceptions = true) + { + basic_json result; + parser(detail::input_adapter(std::move(begin), std::move(end)), cb, allow_exceptions).parse(true, result); + return result; + } JSON_HEDLEY_WARN_UNUSED_RESULT static basic_json parse(detail::span_input_adapter&& i, @@ -6638,10 +6649,17 @@ class basic_json return parser(detail::input_adapter(std::forward(i))).accept(true); } + template + static bool accept(IteratorType begin, IteratorType end) + { + return parser(detail::input_adapter(std::move(begin), std::move(end))).accept(true); + } + static bool accept(detail::span_input_adapter&& i) { return parser(i.get()).accept(true); } + /*! @brief generate SAX events @@ -6695,7 +6713,7 @@ class basic_json @since version 3.2.0 */ - template + template JSON_HEDLEY_NON_NULL(2) static bool sax_parse(InputType&& i, SAX* sax, input_format_t format = input_format_t::json, @@ -6707,6 +6725,18 @@ class basic_json : detail::binary_reader(std::move(ia)).sax_parse(format, sax, strict); } + template + JSON_HEDLEY_NON_NULL(3) + static bool sax_parse(IteratorType first, IteratorType last, SAX* sax, + input_format_t format = input_format_t::json, + const bool strict = true) + { + auto ia = detail::input_adapter(std::move(first), std::move(last)); + return format == input_format_t::json + ? parser(std::move(ia)).sax_parse(sax, strict) + : detail::binary_reader(std::move(ia)).sax_parse(format, sax, strict); + } + template JSON_HEDLEY_NON_NULL(2) static bool sax_parse(detail::span_input_adapter&& i, SAX* sax, @@ -6720,86 +6750,7 @@ class basic_json } - /*! - @brief deserialize from an iterator range with contiguous storage - - This function reads from an iterator range of a container with contiguous - storage of 1-byte values. Compatible container types include - `std::vector`, `std::string`, `std::array`, `std::valarray`, and - `std::initializer_list`. Furthermore, C-style arrays can be used with - `std::begin()`/`std::end()`. User-defined containers can be used as long - as they implement random-access iterators and a contiguous storage. - - @pre The iterator range is contiguous. Violating this precondition yields - undefined behavior. **This precondition is enforced with an assertion.** - @pre Each element in the range has a size of 1 byte. Violating this - precondition yields undefined behavior. **This precondition is enforced - with a static assertion.** - - @warning There is no way to enforce all preconditions at compile-time. If - the function is called with noncompliant iterators and with - assertions switched off, the behavior is undefined and will most - likely yield segmentation violation. - - @tparam IteratorType iterator of container with contiguous storage - @param[in] first begin of the range to parse (included) - @param[in] last end of the range to parse (excluded) - @param[in] cb a parser callback function of type @ref parser_callback_t - which is used to control the deserialization by filtering unwanted values - (optional) - @param[in] allow_exceptions whether to throw exceptions in case of a - parse error (optional, true by default) - - @return deserialized JSON value; in case of a parse error and - @a allow_exceptions set to `false`, the return value will be - value_t::discarded. - - @throw parse_error.101 in case of an unexpected token - @throw parse_error.102 if to_unicode fails or surrogate error - @throw parse_error.103 if to_unicode fails - - @complexity Linear in the length of the input. The parser is a predictive - LL(1) parser. The complexity can be higher if the parser callback function - @a cb has a super-linear complexity. - - @note A UTF-8 byte order mark is silently ignored. - - @liveexample{The example below demonstrates the `parse()` function reading - from an iterator range.,parse__iteratortype__parser_callback_t} - - @since version 2.0.3 - */ - template::iterator_category>::value, int>::type = 0> - static basic_json parse(IteratorType first, IteratorType last, - const parser_callback_t cb = nullptr, - const bool allow_exceptions = true) - { - basic_json result; - parser(detail::input_adapter(first, last), cb, allow_exceptions).parse(true, result); - return result; - } - - template::iterator_category>::value, int>::type = 0> - static bool accept(IteratorType first, IteratorType last) - { - return parser(detail::input_adapter(first, last)).accept(true); - } - - template::iterator_category>::value, int>::type = 0> - JSON_HEDLEY_NON_NULL(3) - static bool sax_parse(IteratorType first, IteratorType last, SAX* sax) - { - return parser(detail::input_adapter(first, last)).sax_parse(sax); - } + /*! @brief deserialize from stream @@ -7449,16 +7400,16 @@ class basic_json /*! @copydoc from_cbor(detail::input_adapter&&, const bool, const bool) */ - template::value, int> = 0> + template JSON_HEDLEY_WARN_UNUSED_RESULT - static basic_json from_cbor(A1 && a1, A2 && a2, + static basic_json from_cbor(IteratorType first, IteratorType last, const bool strict = true, const bool allow_exceptions = true) { basic_json result; detail::json_sax_dom_parser sdp(result, allow_exceptions); - const bool res = binary_reader(detail::span_input_adapter(std::forward(a1), std::forward(a2)).get()).sax_parse(input_format_t::cbor, &sdp, strict); + auto ia = detail::input_adapter(std::move(first), std::move(last)); + const bool res = binary_reader(std::move(ia)).sax_parse(input_format_t::cbor, &sdp, strict); return res ? result : basic_json(value_t::discarded); } @@ -7469,7 +7420,8 @@ class basic_json { basic_json result; detail::json_sax_dom_parser sdp(result, allow_exceptions); - const bool res = binary_reader(i.get()).sax_parse(input_format_t::cbor, &sdp, strict); + auto ia = i.get(); + const bool res = binary_reader(std::move(ia)).sax_parse(input_format_t::cbor, &sdp, strict); return res ? result : basic_json(value_t::discarded); } @@ -7575,16 +7527,16 @@ class basic_json /*! @copydoc from_msgpack(detail::input_adapter&&, const bool, const bool) */ - template::value, int> = 0> + template JSON_HEDLEY_WARN_UNUSED_RESULT - static basic_json from_msgpack(A1 && a1, A2 && a2, + static basic_json from_msgpack(IteratorType first, IteratorType last, const bool strict = true, const bool allow_exceptions = true) { basic_json result; detail::json_sax_dom_parser sdp(result, allow_exceptions); - const bool res = binary_reader(detail::span_input_adapter(std::forward(a1), std::forward(a2)).get()).sax_parse(input_format_t::msgpack, &sdp, strict); + auto ia = detail::input_adapter(std::move(first), std::move(last)); + const bool res = binary_reader(std::move(ia)).sax_parse(input_format_t::msgpack, &sdp, strict); return res ? result : basic_json(value_t::discarded); } @@ -7596,7 +7548,8 @@ class basic_json { basic_json result; detail::json_sax_dom_parser sdp(result, allow_exceptions); - const bool res = binary_reader(i.get()).sax_parse(input_format_t::msgpack, &sdp, strict); + auto ia = i.get(); + const bool res = binary_reader(std::move(ia)).sax_parse(input_format_t::msgpack, &sdp, strict); return res ? result : basic_json(value_t::discarded); } @@ -7678,16 +7631,16 @@ class basic_json /*! @copydoc from_ubjson(detail::input_adapter&&, const bool, const bool) */ - template::value, int> = 0> + template JSON_HEDLEY_WARN_UNUSED_RESULT - static basic_json from_ubjson(A1 && a1, A2 && a2, + static basic_json from_ubjson(IteratorType first, IteratorType last, const bool strict = true, const bool allow_exceptions = true) { basic_json result; detail::json_sax_dom_parser sdp(result, allow_exceptions); - const bool res = binary_reader(detail::span_input_adapter(std::forward(a1), std::forward(a2)).get()).sax_parse(input_format_t::ubjson, &sdp, strict); + auto ia = detail::input_adapter(std::move(first), std::move(last)); + const bool res = binary_reader(std::move(ia)).sax_parse(input_format_t::ubjson, &sdp, strict); return res ? result : basic_json(value_t::discarded); } @@ -7698,7 +7651,8 @@ class basic_json { basic_json result; detail::json_sax_dom_parser sdp(result, allow_exceptions); - const bool res = binary_reader(i.get()).sax_parse(input_format_t::ubjson, &sdp, strict); + auto ia = i.get(); + const bool res = binary_reader(std::move(ia)).sax_parse(input_format_t::ubjson, &sdp, strict); return res ? result : basic_json(value_t::discarded); } @@ -7779,16 +7733,16 @@ class basic_json /*! @copydoc from_bson(detail::input_adapter&&, const bool, const bool) */ - template::value, int> = 0> + template JSON_HEDLEY_WARN_UNUSED_RESULT - static basic_json from_bson(A1 && a1, A2 && a2, + static basic_json from_bson(IteratorType first, IteratorType last, const bool strict = true, const bool allow_exceptions = true) { basic_json result; detail::json_sax_dom_parser sdp(result, allow_exceptions); - const bool res = binary_reader(detail::span_input_adapter(std::forward(a1), std::forward(a2)).get()).sax_parse(input_format_t::bson, &sdp, strict); + auto ia = detail::input_adapter(std::move(first), std::move(last)); + const bool res = binary_reader(std::move(ia)).sax_parse(input_format_t::bson, &sdp, strict); return res ? result : basic_json(value_t::discarded); } @@ -7799,7 +7753,8 @@ class basic_json { basic_json result; detail::json_sax_dom_parser sdp(result, allow_exceptions); - const bool res = binary_reader(i.get()).sax_parse(input_format_t::bson, &sdp, strict); + auto ia = i.get(); + const bool res = binary_reader(std::move(ia)).sax_parse(input_format_t::bson, &sdp, strict); return res ? result : basic_json(value_t::discarded); } /// @} diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index 70ce011a..eeaa9d0a 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -4456,6 +4456,8 @@ Input adapter for stdio file access. This adapter read only 1 byte and do not us class file_input_adapter { public: + using char_type = char; + JSON_HEDLEY_NON_NULL(2) explicit file_input_adapter(std::FILE* f) noexcept : m_file(f) @@ -4490,6 +4492,8 @@ subsequent call for input from the std::istream. class input_stream_adapter { public: + using char_type = char; + ~input_stream_adapter() { // clear stream flags; we use underlying streambuf I/O, do not @@ -4535,51 +4539,55 @@ class input_stream_adapter std::streambuf* sb = nullptr; }; -/// input adapter for buffer input -class input_buffer_adapter +// General-purpose iterator-based adapter. It might not be as fast as +// theoretically possible for some containers, but it is extremely versatile. +template +class iterator_input_adapter { public: - input_buffer_adapter(const char* b, const std::size_t l) noexcept - : cursor(b), limit(b == nullptr ? nullptr : (b + l)) - {} + using char_type = typename std::iterator_traits::value_type; - // delete because of pointer members - input_buffer_adapter(const input_buffer_adapter&) = delete; - input_buffer_adapter& operator=(input_buffer_adapter&) = delete; - input_buffer_adapter(input_buffer_adapter&&) = default; - input_buffer_adapter& operator=(input_buffer_adapter&&) = delete; + iterator_input_adapter(IteratorType begin_ite, IteratorType end_ite) + : current(std::move(begin_ite)), end(std::move(end_ite)) {} - std::char_traits::int_type get_character() noexcept + typename std::char_traits::int_type get_character() { - if (JSON_HEDLEY_LIKELY(cursor < limit)) + if (current != end) { - assert(cursor != nullptr and limit != nullptr); - return std::char_traits::to_int_type(*(cursor++)); + return *current++; } + else + { + return std::char_traits::eof(); + } + } - return std::char_traits::eof(); + bool empty() const + { + return current == end; } private: - /// pointer to the current character - const char* cursor; - /// pointer past the last character - const char* const limit; + IteratorType current; + IteratorType end; }; -template -struct wide_string_input_helper + +template +struct wide_string_input_helper; + +template +struct wide_string_input_helper { // UTF-32 - static void fill_buffer(const WideStringType& str, - size_t& current_wchar, + static void fill_buffer(BaseInputAdapter& input, std::array::int_type, 4>& utf8_bytes, size_t& utf8_bytes_index, size_t& utf8_bytes_filled) { utf8_bytes_index = 0; - if (current_wchar == str.size()) + if (input.empty()) { utf8_bytes[0] = std::char_traits::eof(); utf8_bytes_filled = 1; @@ -4587,7 +4595,7 @@ struct wide_string_input_helper else { // get the current character - const auto wc = static_cast(str[current_wchar++]); + const auto wc = input.get_character(); // UTF-32 to UTF-8 encoding if (wc < 0x80) @@ -4626,19 +4634,18 @@ struct wide_string_input_helper } }; -template -struct wide_string_input_helper +template +struct wide_string_input_helper { // UTF-16 - static void fill_buffer(const WideStringType& str, - size_t& current_wchar, + static void fill_buffer(BaseInputAdapter& input, std::array::int_type, 4>& utf8_bytes, size_t& utf8_bytes_index, size_t& utf8_bytes_filled) { utf8_bytes_index = 0; - if (current_wchar == str.size()) + if (input.empty()) { utf8_bytes[0] = std::char_traits::eof(); utf8_bytes_filled = 1; @@ -4646,7 +4653,7 @@ struct wide_string_input_helper else { // get the current character - const auto wc = static_cast(str[current_wchar++]); + const auto wc = input.get_character(); // UTF-16 to UTF-8 encoding if (wc < 0x80) @@ -4669,9 +4676,9 @@ struct wide_string_input_helper } else { - if (current_wchar < str.size()) + if (!input.empty()) { - const auto wc2 = static_cast(str[current_wchar++]); + const auto wc2 = static_cast(input.get_character()); const auto charcode = 0x10000u + (((wc & 0x3FFu) << 10u) | (wc2 & 0x3FFu)); utf8_bytes[0] = static_cast::int_type>(0xF0u | (charcode >> 18u)); utf8_bytes[1] = static_cast::int_type>(0x80u | ((charcode >> 12u) & 0x3Fu)); @@ -4681,8 +4688,6 @@ struct wide_string_input_helper } else { - // unknown character - ++current_wchar; utf8_bytes[0] = static_cast::int_type>(wc); utf8_bytes_filled = 1; } @@ -4691,20 +4696,20 @@ struct wide_string_input_helper } }; -template +// Wraps another input apdater to convert wide character types into individual bytes. +template class wide_string_input_adapter { public: - explicit wide_string_input_adapter(const WideStringType& w) noexcept - : str(w) - {} + wide_string_input_adapter(BaseInputAdapter base) + : base_adapter(base) {} - std::char_traits::int_type get_character() noexcept + typename std::char_traits::int_type get_character() noexcept { // check if buffer needs to be filled if (utf8_bytes_index == utf8_bytes_filled) { - fill_buffer(); + fill_buffer(); assert(utf8_bytes_filled > 0); assert(utf8_bytes_index == 0); @@ -4717,18 +4722,14 @@ class wide_string_input_adapter } private: + BaseInputAdapter base_adapter; + template void fill_buffer() { - wide_string_input_helper::fill_buffer(str, current_wchar, utf8_bytes, utf8_bytes_index, utf8_bytes_filled); + wide_string_input_helper::fill_buffer(base_adapter, utf8_bytes, utf8_bytes_index, utf8_bytes_filled); } - /// the wstring to process - const WideStringType& str; - - /// index of the current wchar in str - std::size_t current_wchar = 0; - /// a buffer for UTF-8 bytes std::array::int_type, 4> utf8_bytes = {{0, 0, 0, 0}}; @@ -4736,8 +4737,53 @@ class wide_string_input_adapter std::size_t utf8_bytes_index = 0; /// number of valid bytes in the utf8_codes array std::size_t utf8_bytes_filled = 0; +} + + +template +struct iterator_input_adapter_factory +{ + using iterator_type = IteratorType; + using char_type = typename std::iterator_traits::value_type; + using adapter_type = iterator_input_adapter; + + adapter_type create(IteratorType begin, IteratorType end) + { + return adapter_type(std::move(begin), std::mve(end)); + } }; +template +struct iterator_input_adapter_factory::value_type)>1)>::type > + { + + using iterator_type = IteratorType; + using char_type = typename std::iterator_traits::value_type; + using base_adapter_type = iterator_input_adapter; + using adapter_type = wide_string_input_adapter; + + adapter_type create(IteratorType begin, IteratorType end) +{ + return adapter_type(base_adapter_type(std::move(begin), std::mve(end))); +} + }; + +// General purpose iterator-based input +template +typename iterator_input_adapter_factory::adapter_type input_adapter(IteratorType begin, IteratorType end) +{ + return iterator_input_adapter_factory::create(begin, end); +} + +// Convenience shorthand from container to iterator +template +decltype(input_adapter(begin(container), end(container))) input_adapter(const T& container) +{ + return input_adapter(begin(container), end(container)); +} + +// Special cases with fast paths inline file_input_adapter input_adapter(std::FILE* file) { return file_input_adapter(file); @@ -4753,17 +4799,7 @@ inline input_stream_adapter input_adapter(std::istream&& stream) return input_stream_adapter(stream); } -template::value and - std::is_integral::type>::value and - sizeof(typename std::remove_pointer::type) == 1, - int>::type = 0> -input_buffer_adapter input_adapter(CharT b, std::size_t l) -{ - return input_buffer_adapter(reinterpret_cast(b), l); -} - +// Null-delimited strings, and the like. template::value and @@ -4772,78 +4808,11 @@ template::type = 0> input_buffer_adapter input_adapter(CharT b) { - return input_adapter(reinterpret_cast(b), - std::strlen(reinterpret_cast(b))); + auto length = std::strlen(reinterpret_cast(b)); + auto ptr = reinterpret_cast(b); + return input_adapter(ptr, ptr + length); } -template::iterator_category, std::random_access_iterator_tag>::value, - int>::type = 0> -input_buffer_adapter input_adapter(IteratorType first, IteratorType last) -{ -#ifndef NDEBUG - // assertion to check that the iterator range is indeed contiguous, - // see https://stackoverflow.com/a/35008842/266378 for more discussion - const auto is_contiguous = std::accumulate( - first, last, std::pair(true, 0), - [&first](std::pair res, decltype(*first) val) - { - res.first &= (val == *(std::next(std::addressof(*first), res.second++))); - return res; - }).first; - assert(is_contiguous); -#endif - - // assertion to check that each element is 1 byte long - static_assert( - sizeof(typename iterator_traits::value_type) == 1, - "each element in the iterator range must have the size of 1 byte"); - - const auto len = static_cast(std::distance(first, last)); - if (JSON_HEDLEY_LIKELY(len > 0)) - { - // there is at least one element: use the address of first - return input_buffer_adapter(reinterpret_cast(&(*first)), len); - } - else - { - // the address of first cannot be used: use nullptr - return input_buffer_adapter(nullptr, len); - } -} - -inline wide_string_input_adapter input_adapter(const std::wstring& ws) -{ - return wide_string_input_adapter(ws); -} - - -inline wide_string_input_adapter input_adapter(const std::u16string& ws) -{ - return wide_string_input_adapter(ws); -} - -inline wide_string_input_adapter input_adapter(const std::u32string& ws) -{ - return wide_string_input_adapter(ws); -} - -template::value and - std::is_base_of()))>::iterator_category>::value, - int>::type = 0> -input_buffer_adapter input_adapter(const ContiguousContainer& c) -{ - return input_adapter(std::begin(c), std::end(c)); -} - - -template -input_buffer_adapter input_adapter(T (&array)[N]) -{ - return input_adapter(std::begin(array), std::end(array)); -} // This class only handles inputs of input_buffer_adapter type. // It's required so that expressions like {ptr, len} can be implicitely casted @@ -22413,6 +22382,17 @@ class basic_json } + template + JSON_HEDLEY_WARN_UNUSED_RESULT + static basic_json parse(IteratorType begin, + IteratorType end, + const parser_callback_t cb = nullptr, + const bool allow_exceptions = true) + { + basic_json result; + parser(detail::iterator_input_adapter(std::move(begin), std::move(end)), cb, allow_exceptions).parse(true, result); + return result; + } JSON_HEDLEY_WARN_UNUSED_RESULT static basic_json parse(detail::span_input_adapter&& i, @@ -22430,10 +22410,17 @@ class basic_json return parser(detail::input_adapter(std::forward(i))).accept(true); } + template + static bool accept(IteratorType begin, IteratorType end) + { + return parser(detail::iterator_input_adapter(std::move(begin), std::move(end))).accept(true); + } + static bool accept(detail::span_input_adapter&& i) { return parser(i.get()).accept(true); } + /*! @brief generate SAX events diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 838cce53..67377fc4 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -133,6 +133,7 @@ set(files src/unit-ubjson.cpp src/unit-udt.cpp src/unit-unicode.cpp + src/unit-user_defined_input.cpp src/unit-wstring.cpp) foreach(file ${files}) diff --git a/test/Makefile b/test/Makefile index 7bf0fef9..bcf8de7d 100644 --- a/test/Makefile +++ b/test/Makefile @@ -44,6 +44,7 @@ SOURCES = src/unit.cpp \ src/unit-testsuites.cpp \ src/unit-ubjson.cpp \ src/unit-unicode.cpp \ + src/unit-user_defined_input.cpp \ src/unit-wstring.cpp OBJECTS = $(SOURCES:.cpp=.o) diff --git a/test/src/unit-user_defined_input.cpp b/test/src/unit-user_defined_input.cpp new file mode 100644 index 00000000..6948c617 --- /dev/null +++ b/test/src/unit-user_defined_input.cpp @@ -0,0 +1,115 @@ +/* + __ _____ _____ _____ + __| | __| | | | JSON for Modern C++ (test suite) +| | |__ | | | | | | version 3.7.3 +|_____|_____|_____|_|___| https://github.com/nlohmann/json + +Licensed under the MIT License . +SPDX-License-Identifier: MIT +Copyright (c) 2013-2019 Niels Lohmann . + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +#include "doctest_compatibility.h" + +#include +using nlohmann::json; + +#include + +namespace +{ +TEST_CASE("Use arbitrary stdlib container") +{ + std::string raw_data = "[1,2,3,4]"; + std::list data(raw_data.begin(), raw_data.end()); + + json as_json = json::parse(data.begin(), data.end()); + CHECK(as_json.at(0) == 1); + CHECK(as_json.at(1) == 2); + CHECK(as_json.at(2) == 3); + CHECK(as_json.at(3) == 4); +} + +struct MyContainer +{ + const char* data; +}; + +const char* begin(const MyContainer& c) +{ + return c.data; +} + +const char* end(const MyContainer& c) +{ + return c.data + strlen(c.data); +} + +TEST_CASE("Custom container") +{ + + MyContainer data{"[1,2,3,4]"}; + json as_json = json::parse(data); + CHECK(as_json.at(0) == 1); + CHECK(as_json.at(1) == 2); + CHECK(as_json.at(2) == 3); + CHECK(as_json.at(3) == 4); + +} + +TEST_CASE("Custom iterator") +{ + const char* raw_data = "[1,2,3,4]"; + + struct MyIterator { + using difference_type = std::size_t; + using value_type = char; + using pointer = const char*; + using reference = const char&; + using iterator_category = std::input_iterator_tag; + + + MyIterator& operator++() { + ++ptr; + return *this; + } + + reference operator*() const {return *ptr;} + bool operator!=(const MyIterator& rhs) const { + return ptr != rhs.ptr; + } + + const char* ptr; + }; + + MyIterator begin{raw_data}; + MyIterator end{raw_data + strlen(raw_data)}; + + json as_json = json::parse(begin, end); + CHECK(as_json.at(0) == 1); + CHECK(as_json.at(1) == 2); + CHECK(as_json.at(2) == 3); + CHECK(as_json.at(3) == 4); +} + + + +} \ No newline at end of file