#pragma once #include <array> // array #include <cassert> // assert #include <cstddef> // size_t #include <cstdio> //FILE * #include <cstring> // strlen #include <istream> // istream #include <iterator> // begin, end, iterator_traits, random_access_iterator_tag, distance, next #include <memory> // shared_ptr, make_shared, addressof #include <numeric> // accumulate #include <string> // string, char_traits #include <type_traits> // enable_if, is_base_of, is_pointer, is_integral, remove_pointer #include <utility> // pair, declval #include <nlohmann/detail/iterators/iterator_traits.hpp> #include <nlohmann/detail/macro_scope.hpp> namespace nlohmann { namespace detail { /// the supported input formats enum class input_format_t { json, cbor, msgpack, ubjson, bson }; //////////////////// // input adapters // //////////////////// /*! @brief abstract input adapter interface Produces a stream of std::char_traits<char>::int_type characters from a std::istream, a buffer, or some other input type. Accepts the return of exactly one non-EOF character for future input. The int_type characters returned consist of all valid char values as positive values (typically unsigned char), plus an EOF value outside that range, specified by the value of the function std::char_traits<char>::eof(). This value is typically -1, but could be any arbitrary value which is not a valid char value. */ struct input_adapter_protocol { /// get a character [0,255] or std::char_traits<char>::eof(). virtual std::char_traits<char>::int_type get_character() = 0; virtual ~input_adapter_protocol() = default; }; /// a type to simplify interfaces using input_adapter_t = std::shared_ptr<input_adapter_protocol>; /*! Input adapter for stdio file access. This adapter read only 1 byte and do not use any buffer. This adapter is a very low level adapter. */ class file_input_adapter : public input_adapter_protocol { public: JSON_HEDLEY_NON_NULL(2) explicit file_input_adapter(std::FILE* f) noexcept : m_file(f) {} // make class move-only file_input_adapter(const file_input_adapter&) = delete; file_input_adapter(file_input_adapter&&) = default; file_input_adapter& operator=(const file_input_adapter&) = delete; file_input_adapter& operator=(file_input_adapter&&) = default; ~file_input_adapter() override = default; std::char_traits<char>::int_type get_character() noexcept override { return std::fgetc(m_file); } private: /// the file pointer to read from std::FILE* m_file; }; /*! Input adapter for a (caching) istream. Ignores a UFT Byte Order Mark at beginning of input. Does not support changing the underlying std::streambuf in mid-input. Maintains underlying std::istream and std::streambuf to support subsequent use of standard std::istream operations to process any input characters following those used in parsing the JSON input. Clears the std::istream flags; any input errors (e.g., EOF) will be detected by the first subsequent call for input from the std::istream. */ class input_stream_adapter : public input_adapter_protocol { public: ~input_stream_adapter() override { // clear stream flags; we use underlying streambuf I/O, do not // maintain ifstream flags, except eof is.clear(is.rdstate() & std::ios::eofbit); } explicit input_stream_adapter(std::istream& i) : is(i), sb(*i.rdbuf()) {} // delete because of pointer members input_stream_adapter(const input_stream_adapter&) = delete; input_stream_adapter& operator=(input_stream_adapter&) = delete; input_stream_adapter(input_stream_adapter&&) = delete; input_stream_adapter& operator=(input_stream_adapter&&) = delete; // std::istream/std::streambuf use std::char_traits<char>::to_int_type, to // ensure that std::char_traits<char>::eof() and the character 0xFF do not // end up as the same value, eg. 0xFFFFFFFF. std::char_traits<char>::int_type get_character() override { auto res = sb.sbumpc(); // set eof manually, as we don't use the istream interface. if (res == EOF) { is.clear(is.rdstate() | std::ios::eofbit); } return res; } private: /// the associated input stream std::istream& is; std::streambuf& sb; }; /// input adapter for buffer input class input_buffer_adapter : public input_adapter_protocol { public: input_buffer_adapter(const char* b, const std::size_t l) noexcept : cursor(b), limit(b == nullptr ? nullptr : (b + l)) {} // delete because of pointer members input_buffer_adapter(const input_buffer_adapter&) = delete; input_buffer_adapter& operator=(input_buffer_adapter&) = delete; input_buffer_adapter(input_buffer_adapter&&) = delete; input_buffer_adapter& operator=(input_buffer_adapter&&) = delete; ~input_buffer_adapter() override = default; std::char_traits<char>::int_type get_character() noexcept override { if (JSON_HEDLEY_LIKELY(cursor < limit)) { assert(cursor != nullptr and limit != nullptr); return std::char_traits<char>::to_int_type(*(cursor++)); } return std::char_traits<char>::eof(); } private: /// pointer to the current character const char* cursor; /// pointer past the last character const char* const limit; }; template<typename WideStringType, size_t T> struct wide_string_input_helper { // UTF-32 static void fill_buffer(const WideStringType& str, size_t& current_wchar, std::array<std::char_traits<char>::int_type, 4>& utf8_bytes, size_t& utf8_bytes_index, size_t& utf8_bytes_filled) { utf8_bytes_index = 0; if (current_wchar == str.size()) { utf8_bytes[0] = std::char_traits<char>::eof(); utf8_bytes_filled = 1; } else { // get the current character const auto wc = static_cast<unsigned int>(str[current_wchar++]); // UTF-32 to UTF-8 encoding if (wc < 0x80) { utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc); utf8_bytes_filled = 1; } else if (wc <= 0x7FF) { utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xC0u | ((wc >> 6u) & 0x1Fu)); utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | (wc & 0x3Fu)); utf8_bytes_filled = 2; } else if (wc <= 0xFFFF) { utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xE0u | ((wc >> 12u) & 0x0Fu)); utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((wc >> 6u) & 0x3Fu)); utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | (wc & 0x3Fu)); utf8_bytes_filled = 3; } else if (wc <= 0x10FFFF) { utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xF0u | ((wc >> 18u) & 0x07u)); utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((wc >> 12u) & 0x3Fu)); utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | ((wc >> 6u) & 0x3Fu)); utf8_bytes[3] = static_cast<std::char_traits<char>::int_type>(0x80u | (wc & 0x3Fu)); utf8_bytes_filled = 4; } else { // unknown character utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc); utf8_bytes_filled = 1; } } } }; template<typename WideStringType> struct wide_string_input_helper<WideStringType, 2> { // UTF-16 static void fill_buffer(const WideStringType& str, size_t& current_wchar, std::array<std::char_traits<char>::int_type, 4>& utf8_bytes, size_t& utf8_bytes_index, size_t& utf8_bytes_filled) { utf8_bytes_index = 0; if (current_wchar == str.size()) { utf8_bytes[0] = std::char_traits<char>::eof(); utf8_bytes_filled = 1; } else { // get the current character const auto wc = static_cast<unsigned int>(str[current_wchar++]); // UTF-16 to UTF-8 encoding if (wc < 0x80) { utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc); utf8_bytes_filled = 1; } else if (wc <= 0x7FF) { utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xC0u | ((wc >> 6u))); utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | (wc & 0x3Fu)); utf8_bytes_filled = 2; } else if (0xD800 > wc or wc >= 0xE000) { utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xE0u | ((wc >> 12u))); utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((wc >> 6u) & 0x3Fu)); utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | (wc & 0x3Fu)); utf8_bytes_filled = 3; } else { if (current_wchar < str.size()) { const auto wc2 = static_cast<unsigned int>(str[current_wchar++]); const auto charcode = 0x10000u + (((wc & 0x3FFu) << 10u) | (wc2 & 0x3FFu)); utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xF0u | (charcode >> 18u)); utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((charcode >> 12u) & 0x3Fu)); utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | ((charcode >> 6u) & 0x3Fu)); utf8_bytes[3] = static_cast<std::char_traits<char>::int_type>(0x80u | (charcode & 0x3Fu)); utf8_bytes_filled = 4; } else { // unknown character ++current_wchar; utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc); utf8_bytes_filled = 1; } } } } }; template<typename WideStringType> class wide_string_input_adapter : public input_adapter_protocol { public: explicit wide_string_input_adapter(const WideStringType& w) noexcept : str(w) {} std::char_traits<char>::int_type get_character() noexcept override { // check if buffer needs to be filled if (utf8_bytes_index == utf8_bytes_filled) { fill_buffer<sizeof(typename WideStringType::value_type)>(); assert(utf8_bytes_filled > 0); assert(utf8_bytes_index == 0); } // use buffer assert(utf8_bytes_filled > 0); assert(utf8_bytes_index < utf8_bytes_filled); return utf8_bytes[utf8_bytes_index++]; } private: template<size_t T> void fill_buffer() { wide_string_input_helper<WideStringType, T>::fill_buffer(str, current_wchar, utf8_bytes, utf8_bytes_index, utf8_bytes_filled); } /// the wstring to process const WideStringType& str; /// index of the current wchar in str std::size_t current_wchar = 0; /// a buffer for UTF-8 bytes std::array<std::char_traits<char>::int_type, 4> utf8_bytes = {{0, 0, 0, 0}}; /// index to the utf8_codes array for the next valid byte std::size_t utf8_bytes_index = 0; /// number of valid bytes in the utf8_codes array std::size_t utf8_bytes_filled = 0; }; class input_adapter { public: // native support JSON_HEDLEY_NON_NULL(2) input_adapter(std::FILE* file) : ia(std::make_shared<file_input_adapter>(file)) {} /// input adapter for input stream input_adapter(std::istream& i) : ia(std::make_shared<input_stream_adapter>(i)) {} /// input adapter for input stream input_adapter(std::istream&& i) : ia(std::make_shared<input_stream_adapter>(i)) {} input_adapter(const std::wstring& ws) : ia(std::make_shared<wide_string_input_adapter<std::wstring>>(ws)) {} input_adapter(const std::u16string& ws) : ia(std::make_shared<wide_string_input_adapter<std::u16string>>(ws)) {} input_adapter(const std::u32string& ws) : ia(std::make_shared<wide_string_input_adapter<std::u32string>>(ws)) {} /// input adapter for buffer template<typename CharT, typename std::enable_if< std::is_pointer<CharT>::value and std::is_integral<typename std::remove_pointer<CharT>::type>::value and sizeof(typename std::remove_pointer<CharT>::type) == 1, int>::type = 0> input_adapter(CharT b, std::size_t l) : ia(std::make_shared<input_buffer_adapter>(reinterpret_cast<const char*>(b), l)) {} // derived support /// input adapter for string literal template<typename CharT, typename std::enable_if< std::is_pointer<CharT>::value and std::is_integral<typename std::remove_pointer<CharT>::type>::value and sizeof(typename std::remove_pointer<CharT>::type) == 1, int>::type = 0> input_adapter(CharT b) : input_adapter(reinterpret_cast<const char*>(b), std::strlen(reinterpret_cast<const char*>(b))) {} /// input adapter for iterator range with contiguous storage template<class IteratorType, typename std::enable_if< std::is_same<typename iterator_traits<IteratorType>::iterator_category, std::random_access_iterator_tag>::value, int>::type = 0> input_adapter(IteratorType first, IteratorType last) { #ifndef NDEBUG // assertion to check that the iterator range is indeed contiguous, // see http://stackoverflow.com/a/35008842/266378 for more discussion const auto is_contiguous = std::accumulate( first, last, std::pair<bool, int>(true, 0), [&first](std::pair<bool, int> res, decltype(*first) val) { res.first &= (val == *(std::next(std::addressof(*first), res.second++))); return res; }).first; assert(is_contiguous); #endif // assertion to check that each element is 1 byte long static_assert( sizeof(typename iterator_traits<IteratorType>::value_type) == 1, "each element in the iterator range must have the size of 1 byte"); const auto len = static_cast<size_t>(std::distance(first, last)); if (JSON_HEDLEY_LIKELY(len > 0)) { // there is at least one element: use the address of first ia = std::make_shared<input_buffer_adapter>(reinterpret_cast<const char*>(&(*first)), len); } else { // the address of first cannot be used: use nullptr ia = std::make_shared<input_buffer_adapter>(nullptr, len); } } /// input adapter for array template<class T, std::size_t N> input_adapter(T (&array)[N]) : input_adapter(std::begin(array), std::end(array)) {} /// input adapter for contiguous container template<class ContiguousContainer, typename std::enable_if<not std::is_pointer<ContiguousContainer>::value and std::is_base_of<std::random_access_iterator_tag, typename iterator_traits<decltype(std::begin(std::declval<ContiguousContainer const>()))>::iterator_category>::value, int>::type = 0> input_adapter(const ContiguousContainer& c) : input_adapter(std::begin(c), std::end(c)) {} operator input_adapter_t() { return ia; } private: /// the actual adapter input_adapter_t ia = nullptr; }; } // namespace detail } // namespace nlohmann