#pragma once #include // array #include // assert #include // size_t #include //FILE * #include // strlen #include // istream #include // begin, end, iterator_traits, random_access_iterator_tag, distance, next #include // shared_ptr, make_shared, addressof #include // accumulate #include // string, char_traits #include // enable_if, is_base_of, is_pointer, is_integral, remove_pointer #include // pair, declval #include #include namespace nlohmann { namespace detail { /// the supported input formats enum class input_format_t { json, cbor, msgpack, ubjson, bson }; //////////////////// // input adapters // //////////////////// /*! Input adapter for stdio file access. This adapter read only 1 byte and do not use any buffer. This adapter is a very low level adapter. */ class file_input_adapter { public: using char_type = char; JSON_HEDLEY_NON_NULL(2) explicit file_input_adapter(std::FILE* f) noexcept : m_file(f) {} // make class move-only file_input_adapter(const file_input_adapter&) = delete; file_input_adapter(file_input_adapter&&) = default; file_input_adapter& operator=(const file_input_adapter&) = delete; file_input_adapter& operator=(file_input_adapter&&) = delete; std::char_traits::int_type get_character() noexcept { return std::fgetc(m_file); } private: /// the file pointer to read from std::FILE* m_file; }; /*! Input adapter for a (caching) istream. Ignores a UFT Byte Order Mark at beginning of input. Does not support changing the underlying std::streambuf in mid-input. Maintains underlying std::istream and std::streambuf to support subsequent use of standard std::istream operations to process any input characters following those used in parsing the JSON input. Clears the std::istream flags; any input errors (e.g., EOF) will be detected by the first subsequent call for input from the std::istream. */ class input_stream_adapter { public: using char_type = char; ~input_stream_adapter() { // clear stream flags; we use underlying streambuf I/O, do not // maintain ifstream flags, except eof if (is) { is->clear(is->rdstate() & std::ios::eofbit); } } explicit input_stream_adapter(std::istream& i) : is(&i), sb(i.rdbuf()) {} // delete because of pointer members input_stream_adapter(const input_stream_adapter&) = delete; input_stream_adapter& operator=(input_stream_adapter&) = delete; input_stream_adapter& operator=(input_stream_adapter&& rhs) = delete; input_stream_adapter(input_stream_adapter&& rhs) : is(rhs.is), sb(rhs.sb) { rhs.is = nullptr; rhs.sb = nullptr; } // std::istream/std::streambuf use std::char_traits::to_int_type, to // ensure that std::char_traits::eof() and the character 0xFF do not // end up as the same value, eg. 0xFFFFFFFF. std::char_traits::int_type get_character() { auto res = sb->sbumpc(); // set eof manually, as we don't use the istream interface. if (JSON_HEDLEY_UNLIKELY(res == EOF)) { is->clear(is->rdstate() | std::ios::eofbit); } return res; } private: /// the associated input stream std::istream* is = nullptr; std::streambuf* sb = nullptr; }; // General-purpose iterator-based adapter. It might not be as fast as // theoretically possible for some containers, but it is extremely versatile. template class iterator_input_adapter { public: using char_type = typename std::iterator_traits::value_type; iterator_input_adapter(IteratorType first, IteratorType last) : current(std::move(first)), end(std::move(last)) {} typename std::char_traits::int_type get_character() { if (JSON_HEDLEY_LIKELY(current != end)) { auto result = std::char_traits::to_int_type(*current); std::advance(current, 1); return result; } else { return std::char_traits::eof(); } } private: IteratorType current; IteratorType end; template friend class wide_string_input_helper; bool empty() const { return current == end; } }; template struct wide_string_input_helper; template struct wide_string_input_helper { // UTF-32 static void fill_buffer(BaseInputAdapter& input, std::array::int_type, 4>& utf8_bytes, size_t& utf8_bytes_index, size_t& utf8_bytes_filled) { utf8_bytes_index = 0; if (JSON_HEDLEY_UNLIKELY(input.empty())) { utf8_bytes[0] = std::char_traits::eof(); utf8_bytes_filled = 1; } else { // get the current character const auto wc = input.get_character(); // UTF-32 to UTF-8 encoding if (wc < 0x80) { utf8_bytes[0] = static_cast::int_type>(wc); utf8_bytes_filled = 1; } else if (wc <= 0x7FF) { utf8_bytes[0] = static_cast::int_type>(0xC0u | ((wc >> 6u) & 0x1Fu)); utf8_bytes[1] = static_cast::int_type>(0x80u | (wc & 0x3Fu)); utf8_bytes_filled = 2; } else if (wc <= 0xFFFF) { utf8_bytes[0] = static_cast::int_type>(0xE0u | ((wc >> 12u) & 0x0Fu)); utf8_bytes[1] = static_cast::int_type>(0x80u | ((wc >> 6u) & 0x3Fu)); utf8_bytes[2] = static_cast::int_type>(0x80u | (wc & 0x3Fu)); utf8_bytes_filled = 3; } else if (wc <= 0x10FFFF) { utf8_bytes[0] = static_cast::int_type>(0xF0u | ((wc >> 18u) & 0x07u)); utf8_bytes[1] = static_cast::int_type>(0x80u | ((wc >> 12u) & 0x3Fu)); utf8_bytes[2] = static_cast::int_type>(0x80u | ((wc >> 6u) & 0x3Fu)); utf8_bytes[3] = static_cast::int_type>(0x80u | (wc & 0x3Fu)); utf8_bytes_filled = 4; } else { // unknown character utf8_bytes[0] = static_cast::int_type>(wc); utf8_bytes_filled = 1; } } } }; template struct wide_string_input_helper { // UTF-16 static void fill_buffer(BaseInputAdapter& input, std::array::int_type, 4>& utf8_bytes, size_t& utf8_bytes_index, size_t& utf8_bytes_filled) { utf8_bytes_index = 0; if (JSON_HEDLEY_UNLIKELY(input.empty())) { utf8_bytes[0] = std::char_traits::eof(); utf8_bytes_filled = 1; } else { // get the current character const auto wc = input.get_character(); // UTF-16 to UTF-8 encoding if (wc < 0x80) { utf8_bytes[0] = static_cast::int_type>(wc); utf8_bytes_filled = 1; } else if (wc <= 0x7FF) { utf8_bytes[0] = static_cast::int_type>(0xC0u | ((wc >> 6u))); utf8_bytes[1] = static_cast::int_type>(0x80u | (wc & 0x3Fu)); utf8_bytes_filled = 2; } else if (0xD800 > wc or wc >= 0xE000) { utf8_bytes[0] = static_cast::int_type>(0xE0u | ((wc >> 12u))); utf8_bytes[1] = static_cast::int_type>(0x80u | ((wc >> 6u) & 0x3Fu)); utf8_bytes[2] = static_cast::int_type>(0x80u | (wc & 0x3Fu)); utf8_bytes_filled = 3; } else { if (JSON_HEDLEY_UNLIKELY(not input.empty())) { const auto wc2 = static_cast(input.get_character()); const auto charcode = 0x10000u + (((wc & 0x3FFu) << 10u) | (wc2 & 0x3FFu)); utf8_bytes[0] = static_cast::int_type>(0xF0u | (charcode >> 18u)); utf8_bytes[1] = static_cast::int_type>(0x80u | ((charcode >> 12u) & 0x3Fu)); utf8_bytes[2] = static_cast::int_type>(0x80u | ((charcode >> 6u) & 0x3Fu)); utf8_bytes[3] = static_cast::int_type>(0x80u | (charcode & 0x3Fu)); utf8_bytes_filled = 4; } else { utf8_bytes[0] = static_cast::int_type>(wc); utf8_bytes_filled = 1; } } } } }; // Wraps another input apdater to convert wide character types into individual bytes. template class wide_string_input_adapter { public: wide_string_input_adapter(BaseInputAdapter base) : base_adapter(base) {} typename std::char_traits::int_type get_character() noexcept { // check if buffer needs to be filled if (utf8_bytes_index == utf8_bytes_filled) { fill_buffer(); assert(utf8_bytes_filled > 0); assert(utf8_bytes_index == 0); } // use buffer assert(utf8_bytes_filled > 0); assert(utf8_bytes_index < utf8_bytes_filled); return utf8_bytes[utf8_bytes_index++]; } private: BaseInputAdapter base_adapter; template void fill_buffer() { wide_string_input_helper::fill_buffer(base_adapter, utf8_bytes, utf8_bytes_index, utf8_bytes_filled); } /// a buffer for UTF-8 bytes std::array::int_type, 4> utf8_bytes = {{0, 0, 0, 0}}; /// index to the utf8_codes array for the next valid byte std::size_t utf8_bytes_index = 0; /// number of valid bytes in the utf8_codes array std::size_t utf8_bytes_filled = 0; }; template struct iterator_input_adapter_factory { using iterator_type = IteratorType; using char_type = typename std::iterator_traits::value_type; using adapter_type = iterator_input_adapter; static adapter_type create(IteratorType first, IteratorType last) { return adapter_type(std::move(first), std::move(last)); } }; // This test breaks astyle formatting when inlined in a template specialization. template inline constexpr bool is_iterator_of_multibyte() { return sizeof(typename std::iterator_traits::value_type) > 1; } template struct iterator_input_adapter_factory()>> { using iterator_type = IteratorType; using char_type = typename std::iterator_traits::value_type; using base_adapter_type = iterator_input_adapter; using adapter_type = wide_string_input_adapter; static adapter_type create(IteratorType first, IteratorType last) { return adapter_type(base_adapter_type(std::move(first), std::move(last))); } }; // General purpose iterator-based input template typename iterator_input_adapter_factory::adapter_type input_adapter(IteratorType first, IteratorType last) { using factory_type = iterator_input_adapter_factory; return factory_type::create(first, last); } // Convenience shorthand from container to iterator template auto input_adapter(const ContainerType& container) -> decltype(input_adapter(begin(container), end(container))) { // Enable ADL using std::begin; using std::end; return input_adapter(begin(container), end(container)); } // Special cases with fast paths inline file_input_adapter input_adapter(std::FILE* file) { return file_input_adapter(file); } inline input_stream_adapter input_adapter(std::istream& stream) { return input_stream_adapter(stream); } inline input_stream_adapter input_adapter(std::istream&& stream) { return input_stream_adapter(stream); } using contiguous_bytes_input_adapter = decltype(input_adapter(std::declval(), std::declval())); // Null-delimited strings, and the like. template < typename CharT, typename std::enable_if < std::is_pointer::value and not std::is_array::value and std::is_integral::type>::value and sizeof(typename std::remove_pointer::type) == 1, int >::type = 0 > contiguous_bytes_input_adapter input_adapter(CharT b) { auto length = std::strlen(reinterpret_cast(b)); auto ptr = reinterpret_cast(b); return input_adapter(ptr, ptr + length); } template auto input_adapter(T (&array)[N]) -> decltype(input_adapter(array, array + N)) { return input_adapter(array, array + N); } // This class only handles inputs of input_buffer_adapter type. // It's required so that expressions like {ptr, len} can be implicitely casted // to the correct adapter. class span_input_adapter { public: template::value and std::is_integral::type>::value and sizeof(typename std::remove_pointer::type) == 1, int>::type = 0> span_input_adapter(CharT b, std::size_t l) : ia(reinterpret_cast(b), reinterpret_cast(b) + l) {} template::iterator_category, std::random_access_iterator_tag>::value, int>::type = 0> span_input_adapter(IteratorType first, IteratorType last) : ia(input_adapter(first, last)) {} contiguous_bytes_input_adapter&& get() { return std::move(ia); } private: contiguous_bytes_input_adapter ia; }; } // namespace detail } // namespace nlohmann