diff --git a/Makefile b/Makefile index 0e6a8b39..9a3060af 100644 --- a/Makefile +++ b/Makefile @@ -9,7 +9,8 @@ SRCS = ${SRCDIR}/json.hpp \ ${SRCDIR}/detail/exceptions.hpp \ ${SRCDIR}/detail/value_t.hpp \ ${SRCDIR}/detail/conversions/from_json.hpp \ - ${SRCDIR}/detail/conversions/to_json.hpp + ${SRCDIR}/detail/conversions/to_json.hpp \ + ${SRCDIR}/detail/parsing/input_adapters.hpp diff --git a/src/detail/parsing/input_adapters.hpp b/src/detail/parsing/input_adapters.hpp new file mode 100644 index 00000000..17cf5892 --- /dev/null +++ b/src/detail/parsing/input_adapters.hpp @@ -0,0 +1,258 @@ +#ifndef NLOHMANN_JSON_DETAIL_PARSING_INPUT_ADAPTERS_HPP +#define NLOHMANN_JSON_DETAIL_PARSING_INPUT_ADAPTERS_HPP + +#include +#include +#include +#include +#include +#include + +#include "detail/macro_scope.hpp" + +namespace nlohmann +{ +namespace detail +{ +//////////////////// +// input adapters // +//////////////////// + +/*! +@brief abstract input adapter interface + +Produces a stream of std::char_traits::int_type characters from a +std::istream, a buffer, or some other input type. Accepts the return of exactly +one non-EOF character for future input. The int_type characters returned +consist of all valid char values as positive values (typically unsigned char), +plus an EOF value outside that range, specified by the value of the function +std::char_traits::eof(). This value is typically -1, but could be any +arbitrary value which is not a valid char value. +*/ +struct input_adapter_protocol +{ + /// get a character [0,255] or std::char_traits::eof(). + virtual std::char_traits::int_type get_character() = 0; + /// restore the last non-eof() character to input + virtual void unget_character() = 0; + virtual ~input_adapter_protocol() = default; +}; + +/// a type to simplify interfaces +using input_adapter_t = std::shared_ptr; + +/*! +Input adapter for a (caching) istream. Ignores a UFT Byte Order Mark at +beginning of input. Does not support changing the underlying std::streambuf +in mid-input. Maintains underlying std::istream and std::streambuf to support +subsequent use of standard std::istream operations to process any input +characters following those used in parsing the JSON input. Clears the +std::istream flags; any input errors (e.g., EOF) will be detected by the first +subsequent call for input from the std::istream. +*/ +class input_stream_adapter : public input_adapter_protocol +{ + public: + ~input_stream_adapter() override + { + // clear stream flags; we use underlying streambuf I/O, do not + // maintain ifstream flags + is.clear(); + } + + explicit input_stream_adapter(std::istream& i) + : is(i), sb(*i.rdbuf()) + { + // skip byte order mark + std::char_traits::int_type c; + if ((c = get_character()) == 0xEF) + { + if ((c = get_character()) == 0xBB) + { + if ((c = get_character()) == 0xBF) + { + return; // Ignore BOM + } + else if (c != std::char_traits::eof()) + { + is.unget(); + } + is.putback('\xBB'); + } + else if (c != std::char_traits::eof()) + { + is.unget(); + } + is.putback('\xEF'); + } + else if (c != std::char_traits::eof()) + { + is.unget(); // no byte order mark; process as usual + } + } + + // delete because of pointer members + input_stream_adapter(const input_stream_adapter&) = delete; + input_stream_adapter& operator=(input_stream_adapter&) = delete; + + // std::istream/std::streambuf use std::char_traits::to_int_type, to + // ensure that std::char_traits::eof() and the character 0xFF do not + // end up as the same value, eg. 0xFFFFFFFF. + std::char_traits::int_type get_character() override + { + return sb.sbumpc(); + } + + void unget_character() override + { + sb.sungetc(); // is.unget() avoided for performance + } + + private: + /// the associated input stream + std::istream& is; + std::streambuf& sb; +}; + +/// input adapter for buffer input +class input_buffer_adapter : public input_adapter_protocol +{ + public: + input_buffer_adapter(const char* b, const std::size_t l) + : cursor(b), limit(b + l), start(b) + { + // skip byte order mark + if (l >= 3 and b[0] == '\xEF' and b[1] == '\xBB' and b[2] == '\xBF') + { + cursor += 3; + } + } + + // delete because of pointer members + input_buffer_adapter(const input_buffer_adapter&) = delete; + input_buffer_adapter& operator=(input_buffer_adapter&) = delete; + + std::char_traits::int_type get_character() noexcept override + { + if (JSON_LIKELY(cursor < limit)) + { + return std::char_traits::to_int_type(*(cursor++)); + } + + return std::char_traits::eof(); + } + + void unget_character() noexcept override + { + if (JSON_LIKELY(cursor > start)) + { + --cursor; + } + } + + private: + /// pointer to the current character + const char* cursor; + /// pointer past the last character + const char* limit; + /// pointer to the first character + const char* start; +}; + +class input_adapter +{ + public: + // native support + + /// input adapter for input stream + input_adapter(std::istream& i) + : ia(std::make_shared(i)) {} + + /// input adapter for input stream + input_adapter(std::istream&& i) + : ia(std::make_shared(i)) {} + + /// input adapter for buffer + template::value and + std::is_integral::type>::value and + sizeof(typename std::remove_pointer::type) == 1, + int>::type = 0> + input_adapter(CharT b, std::size_t l) + : ia(std::make_shared(reinterpret_cast(b), l)) {} + + // derived support + + /// input adapter for string literal + template::value and + std::is_integral::type>::value and + sizeof(typename std::remove_pointer::type) == 1, + int>::type = 0> + input_adapter(CharT b) + : input_adapter(reinterpret_cast(b), + std::strlen(reinterpret_cast(b))) {} + + /// input adapter for iterator range with contiguous storage + template::iterator_category, std::random_access_iterator_tag>::value, + int>::type = 0> + input_adapter(IteratorType first, IteratorType last) + { + // assertion to check that the iterator range is indeed contiguous, + // see http://stackoverflow.com/a/35008842/266378 for more discussion + assert(std::accumulate( + first, last, std::pair(true, 0), + [&first](std::pair res, decltype(*first) val) + { + res.first &= (val == *(std::next(std::addressof(*first), res.second++))); + return res; + }).first); + + // assertion to check that each element is 1 byte long + static_assert( + sizeof(typename std::iterator_traits::value_type) == 1, + "each element in the iterator range must have the size of 1 byte"); + + const auto len = static_cast(std::distance(first, last)); + if (JSON_LIKELY(len > 0)) + { + // there is at least one element: use the address of first + ia = std::make_shared(reinterpret_cast(&(*first)), len); + } + else + { + // the address of first cannot be used: use nullptr + ia = std::make_shared(nullptr, len); + } + } + + /// input adapter for array + template + input_adapter(T (&array)[N]) + : input_adapter(std::begin(array), std::end(array)) {} + + /// input adapter for contiguous container + template::value and + std::is_base_of()))>::iterator_category>::value, + int>::type = 0> + input_adapter(const ContiguousContainer& c) + : input_adapter(std::begin(c), std::end(c)) {} + + operator input_adapter_t() + { + return ia; + } + + private: + /// the actual adapter + input_adapter_t ia = nullptr; +}; +} +} + +#endif diff --git a/src/json.hpp b/src/json.hpp index ae7d727e..f3fea0b8 100644 --- a/src/json.hpp +++ b/src/json.hpp @@ -57,6 +57,7 @@ SOFTWARE. #include "detail/value_t.hpp" #include "detail/conversions/from_json.hpp" #include "detail/conversions/to_json.hpp" +#include "detail/parsing/input_adapters.hpp" /*! @brief namespace for Niels Lohmann @@ -67,245 +68,6 @@ namespace nlohmann { namespace detail { -//////////////////// -// input adapters // -//////////////////// - -/*! -@brief abstract input adapter interface - -Produces a stream of std::char_traits::int_type characters from a -std::istream, a buffer, or some other input type. Accepts the return of exactly -one non-EOF character for future input. The int_type characters returned -consist of all valid char values as positive values (typically unsigned char), -plus an EOF value outside that range, specified by the value of the function -std::char_traits::eof(). This value is typically -1, but could be any -arbitrary value which is not a valid char value. -*/ -struct input_adapter_protocol -{ - /// get a character [0,255] or std::char_traits::eof(). - virtual std::char_traits::int_type get_character() = 0; - /// restore the last non-eof() character to input - virtual void unget_character() = 0; - virtual ~input_adapter_protocol() = default; -}; - -/// a type to simplify interfaces -using input_adapter_t = std::shared_ptr; - -/*! -Input adapter for a (caching) istream. Ignores a UFT Byte Order Mark at -beginning of input. Does not support changing the underlying std::streambuf -in mid-input. Maintains underlying std::istream and std::streambuf to support -subsequent use of standard std::istream operations to process any input -characters following those used in parsing the JSON input. Clears the -std::istream flags; any input errors (e.g., EOF) will be detected by the first -subsequent call for input from the std::istream. -*/ -class input_stream_adapter : public input_adapter_protocol -{ - public: - ~input_stream_adapter() override - { - // clear stream flags; we use underlying streambuf I/O, do not - // maintain ifstream flags - is.clear(); - } - - explicit input_stream_adapter(std::istream& i) - : is(i), sb(*i.rdbuf()) - { - // skip byte order mark - std::char_traits::int_type c; - if ((c = get_character()) == 0xEF) - { - if ((c = get_character()) == 0xBB) - { - if ((c = get_character()) == 0xBF) - { - return; // Ignore BOM - } - else if (c != std::char_traits::eof()) - { - is.unget(); - } - is.putback('\xBB'); - } - else if (c != std::char_traits::eof()) - { - is.unget(); - } - is.putback('\xEF'); - } - else if (c != std::char_traits::eof()) - { - is.unget(); // no byte order mark; process as usual - } - } - - // delete because of pointer members - input_stream_adapter(const input_stream_adapter&) = delete; - input_stream_adapter& operator=(input_stream_adapter&) = delete; - - // std::istream/std::streambuf use std::char_traits::to_int_type, to - // ensure that std::char_traits::eof() and the character 0xFF do not - // end up as the same value, eg. 0xFFFFFFFF. - std::char_traits::int_type get_character() override - { - return sb.sbumpc(); - } - - void unget_character() override - { - sb.sungetc(); // is.unget() avoided for performance - } - - private: - /// the associated input stream - std::istream& is; - std::streambuf& sb; -}; - -/// input adapter for buffer input -class input_buffer_adapter : public input_adapter_protocol -{ - public: - input_buffer_adapter(const char* b, const std::size_t l) - : cursor(b), limit(b + l), start(b) - { - // skip byte order mark - if (l >= 3 and b[0] == '\xEF' and b[1] == '\xBB' and b[2] == '\xBF') - { - cursor += 3; - } - } - - // delete because of pointer members - input_buffer_adapter(const input_buffer_adapter&) = delete; - input_buffer_adapter& operator=(input_buffer_adapter&) = delete; - - std::char_traits::int_type get_character() noexcept override - { - if (JSON_LIKELY(cursor < limit)) - { - return std::char_traits::to_int_type(*(cursor++)); - } - - return std::char_traits::eof(); - } - - void unget_character() noexcept override - { - if (JSON_LIKELY(cursor > start)) - { - --cursor; - } - } - - private: - /// pointer to the current character - const char* cursor; - /// pointer past the last character - const char* limit; - /// pointer to the first character - const char* start; -}; - -class input_adapter -{ - public: - // native support - - /// input adapter for input stream - input_adapter(std::istream& i) - : ia(std::make_shared(i)) {} - - /// input adapter for input stream - input_adapter(std::istream&& i) - : ia(std::make_shared(i)) {} - - /// input adapter for buffer - template::value and - std::is_integral::type>::value and - sizeof(typename std::remove_pointer::type) == 1, - int>::type = 0> - input_adapter(CharT b, std::size_t l) - : ia(std::make_shared(reinterpret_cast(b), l)) {} - - // derived support - - /// input adapter for string literal - template::value and - std::is_integral::type>::value and - sizeof(typename std::remove_pointer::type) == 1, - int>::type = 0> - input_adapter(CharT b) - : input_adapter(reinterpret_cast(b), - std::strlen(reinterpret_cast(b))) {} - - /// input adapter for iterator range with contiguous storage - template::iterator_category, std::random_access_iterator_tag>::value, - int>::type = 0> - input_adapter(IteratorType first, IteratorType last) - { - // assertion to check that the iterator range is indeed contiguous, - // see http://stackoverflow.com/a/35008842/266378 for more discussion - assert(std::accumulate( - first, last, std::pair(true, 0), - [&first](std::pair res, decltype(*first) val) - { - res.first &= (val == *(std::next(std::addressof(*first), res.second++))); - return res; - }).first); - - // assertion to check that each element is 1 byte long - static_assert( - sizeof(typename std::iterator_traits::value_type) == 1, - "each element in the iterator range must have the size of 1 byte"); - - const auto len = static_cast(std::distance(first, last)); - if (JSON_LIKELY(len > 0)) - { - // there is at least one element: use the address of first - ia = std::make_shared(reinterpret_cast(&(*first)), len); - } - else - { - // the address of first cannot be used: use nullptr - ia = std::make_shared(nullptr, len); - } - } - - /// input adapter for array - template - input_adapter(T (&array)[N]) - : input_adapter(std::begin(array), std::end(array)) {} - - /// input adapter for contiguous container - template::value and - std::is_base_of()))>::iterator_category>::value, - int>::type = 0> - input_adapter(const ContiguousContainer& c) - : input_adapter(std::begin(c), std::end(c)) {} - - operator input_adapter_t() - { - return ia; - } - - private: - /// the actual adapter - input_adapter_t ia = nullptr; -}; - ////////////////////// // lexer and parser // //////////////////////