Merge pull request #1950 from FrancoisChabot/issues/1457

templated input adapters
This commit is contained in:
Niels Lohmann 2020-05-14 07:52:02 +02:00 committed by GitHub
commit 0857140839
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 708 additions and 419 deletions

View file

@ -24,6 +24,20 @@ namespace nlohmann
{
namespace detail
{
/*!
@brief determine system byte order
@return true if and only if system's byte order is little endian
@note from https://stackoverflow.com/a/1001328/266378
*/
static bool little_endianess(int num = 1) noexcept
{
return *reinterpret_cast<char*>(&num) == 1;
}
///////////////////
// binary reader //
///////////////////
@ -31,7 +45,7 @@ namespace detail
/*!
@brief deserialization of CBOR, MessagePack, and UBJSON values
*/
template<typename BasicJsonType, typename SAX = json_sax_dom_parser<BasicJsonType>>
template<typename BasicJsonType, typename InputAdapterType, typename SAX = json_sax_dom_parser<BasicJsonType>>
class binary_reader
{
using number_integer_t = typename BasicJsonType::number_integer_t;
@ -47,10 +61,9 @@ class binary_reader
@param[in] adapter input adapter to read from
*/
explicit binary_reader(input_adapter_t adapter) : ia(std::move(adapter))
explicit binary_reader(InputAdapterType&& adapter) : ia(std::move(adapter))
{
(void)detail::is_sax_static_asserts<SAX, BasicJsonType> {};
assert(ia);
}
// make class move-only
@ -119,18 +132,6 @@ class binary_reader
return result;
}
/*!
@brief determine system byte order
@return true if and only if system's byte order is little endian
@note from https://stackoverflow.com/a/1001328/266378
*/
static constexpr bool little_endianess(int num = 1) noexcept
{
return *reinterpret_cast<char*>(&num) == 1;
}
private:
//////////
// BSON //
@ -2085,7 +2086,7 @@ class binary_reader
int get()
{
++chars_read;
return current = ia->get_character();
return current = ia.get_character();
}
/*!
@ -2273,7 +2274,7 @@ class binary_reader
private:
/// input adapter
input_adapter_t ia = nullptr;
InputAdapterType ia;
/// the current character
int current = std::char_traits<char>::eof();

View file

@ -27,32 +27,11 @@ enum class input_format_t { json, cbor, msgpack, ubjson, bson };
// input adapters //
////////////////////
/*!
@brief abstract input adapter interface
Produces a stream of std::char_traits<char>::int_type characters from a
std::istream, a buffer, or some other input type. Accepts the return of
exactly one non-EOF character for future input. The int_type characters
returned consist of all valid char values as positive values (typically
unsigned char), plus an EOF value outside that range, specified by the value
of the function std::char_traits<char>::eof(). This value is typically -1, but
could be any arbitrary value which is not a valid char value.
*/
struct input_adapter_protocol
{
/// get a character [0,255] or std::char_traits<char>::eof().
virtual std::char_traits<char>::int_type get_character() = 0;
virtual ~input_adapter_protocol() = default;
};
/// a type to simplify interfaces
using input_adapter_t = std::shared_ptr<input_adapter_protocol>;
/*!
Input adapter for stdio file access. This adapter read only 1 byte and do not use any
buffer. This adapter is a very low level adapter.
*/
class file_input_adapter : public input_adapter_protocol
class file_input_adapter
{
public:
JSON_HEDLEY_NON_NULL(2)
@ -64,10 +43,9 @@ class file_input_adapter : public input_adapter_protocol
file_input_adapter(const file_input_adapter&) = delete;
file_input_adapter(file_input_adapter&&) = default;
file_input_adapter& operator=(const file_input_adapter&) = delete;
file_input_adapter& operator=(file_input_adapter&&) = default;
~file_input_adapter() override = default;
file_input_adapter& operator=(file_input_adapter&&) = delete;
std::char_traits<char>::int_type get_character() noexcept override
std::char_traits<char>::int_type get_character() noexcept
{
return std::fgetc(m_file);
}
@ -87,48 +65,56 @@ characters following those used in parsing the JSON input. Clears the
std::istream flags; any input errors (e.g., EOF) will be detected by the first
subsequent call for input from the std::istream.
*/
class input_stream_adapter : public input_adapter_protocol
class input_stream_adapter
{
public:
~input_stream_adapter() override
~input_stream_adapter()
{
// clear stream flags; we use underlying streambuf I/O, do not
// maintain ifstream flags, except eof
is.clear(is.rdstate() & std::ios::eofbit);
if (is)
{
is->clear(is->rdstate() & std::ios::eofbit);
}
}
explicit input_stream_adapter(std::istream& i)
: is(i), sb(*i.rdbuf())
: is(&i), sb(i.rdbuf())
{}
// delete because of pointer members
input_stream_adapter(const input_stream_adapter&) = delete;
input_stream_adapter& operator=(input_stream_adapter&) = delete;
input_stream_adapter(input_stream_adapter&&) = delete;
input_stream_adapter& operator=(input_stream_adapter&&) = delete;
input_stream_adapter& operator=(input_stream_adapter&& rhs) = delete;
input_stream_adapter(input_stream_adapter&& rhs) : is(rhs.is), sb(rhs.sb)
{
rhs.is = nullptr;
rhs.sb = nullptr;
}
// std::istream/std::streambuf use std::char_traits<char>::to_int_type, to
// ensure that std::char_traits<char>::eof() and the character 0xFF do not
// end up as the same value, eg. 0xFFFFFFFF.
std::char_traits<char>::int_type get_character() override
std::char_traits<char>::int_type get_character()
{
auto res = sb.sbumpc();
auto res = sb->sbumpc();
// set eof manually, as we don't use the istream interface.
if (res == EOF)
{
is.clear(is.rdstate() | std::ios::eofbit);
is->clear(is->rdstate() | std::ios::eofbit);
}
return res;
}
private:
/// the associated input stream
std::istream& is;
std::streambuf& sb;
std::istream* is = nullptr;
std::streambuf* sb = nullptr;
};
/// input adapter for buffer input
class input_buffer_adapter : public input_adapter_protocol
class input_buffer_adapter
{
public:
input_buffer_adapter(const char* b, const std::size_t l) noexcept
@ -138,11 +124,10 @@ class input_buffer_adapter : public input_adapter_protocol
// delete because of pointer members
input_buffer_adapter(const input_buffer_adapter&) = delete;
input_buffer_adapter& operator=(input_buffer_adapter&) = delete;
input_buffer_adapter(input_buffer_adapter&&) = delete;
input_buffer_adapter(input_buffer_adapter&&) = default;
input_buffer_adapter& operator=(input_buffer_adapter&&) = delete;
~input_buffer_adapter() override = default;
std::char_traits<char>::int_type get_character() noexcept override
std::char_traits<char>::int_type get_character() noexcept
{
if (JSON_HEDLEY_LIKELY(cursor < limit))
{
@ -285,14 +270,14 @@ struct wide_string_input_helper<WideStringType, 2>
};
template<typename WideStringType>
class wide_string_input_adapter : public input_adapter_protocol
class wide_string_input_adapter
{
public:
explicit wide_string_input_adapter(const WideStringType& w) noexcept
: str(w)
{}
std::char_traits<char>::int_type get_character() noexcept override
std::char_traits<char>::int_type get_character() noexcept
{
// check if buffer needs to be filled
if (utf8_bytes_index == utf8_bytes_filled)
@ -331,112 +316,164 @@ class wide_string_input_adapter : public input_adapter_protocol
std::size_t utf8_bytes_filled = 0;
};
class input_adapter
inline file_input_adapter input_adapter(std::FILE* file)
{
return file_input_adapter(file);
}
inline input_stream_adapter input_adapter(std::istream& stream)
{
return input_stream_adapter(stream);
}
inline input_stream_adapter input_adapter(std::istream&& stream)
{
return input_stream_adapter(stream);
}
template<typename CharT,
typename std::enable_if<
std::is_pointer<CharT>::value and
std::is_integral<typename std::remove_pointer<CharT>::type>::value and
sizeof(typename std::remove_pointer<CharT>::type) == 1,
int>::type = 0>
input_buffer_adapter input_adapter(CharT b, std::size_t l)
{
return input_buffer_adapter(reinterpret_cast<const char*>(b), l);
}
template<typename CharT,
typename std::enable_if<
std::is_pointer<CharT>::value and
std::is_integral<typename std::remove_pointer<CharT>::type>::value and
sizeof(typename std::remove_pointer<CharT>::type) == 1,
int>::type = 0>
input_buffer_adapter input_adapter(CharT b)
{
return input_adapter(reinterpret_cast<const char*>(b),
std::strlen(reinterpret_cast<const char*>(b)));
}
template<class IteratorType,
typename std::enable_if<
std::is_same<typename iterator_traits<IteratorType>::iterator_category, std::random_access_iterator_tag>::value,
int>::type = 0>
input_buffer_adapter input_adapter(IteratorType first, IteratorType last)
{
#ifndef NDEBUG
// assertion to check that the iterator range is indeed contiguous,
// see https://stackoverflow.com/a/35008842/266378 for more discussion
const auto is_contiguous = std::accumulate(
first, last, std::pair<bool, int>(true, 0),
[&first](std::pair<bool, int> res, decltype(*first) val)
{
res.first &= (val == *(std::next(std::addressof(*first), res.second++)));
return res;
}).first;
assert(is_contiguous);
#endif
// assertion to check that each element is 1 byte long
static_assert(
sizeof(typename iterator_traits<IteratorType>::value_type) == 1,
"each element in the iterator range must have the size of 1 byte");
const auto len = static_cast<size_t>(std::distance(first, last));
if (JSON_HEDLEY_LIKELY(len > 0))
{
// there is at least one element: use the address of first
return input_buffer_adapter(reinterpret_cast<const char*>(&(*first)), len);
}
else
{
// the address of first cannot be used: use nullptr
return input_buffer_adapter(nullptr, len);
}
}
inline wide_string_input_adapter<std::wstring> input_adapter(const std::wstring& ws)
{
return wide_string_input_adapter<std::wstring>(ws);
}
inline wide_string_input_adapter<std::u16string> input_adapter(const std::u16string& ws)
{
return wide_string_input_adapter<std::u16string>(ws);
}
inline wide_string_input_adapter<std::u32string> input_adapter(const std::u32string& ws)
{
return wide_string_input_adapter<std::u32string>(ws);
}
template<class ContiguousContainer, typename
std::enable_if<not std::is_pointer<ContiguousContainer>::value and
std::is_base_of<std::random_access_iterator_tag, typename iterator_traits<decltype(std::begin(std::declval<ContiguousContainer const>()))>::iterator_category>::value,
int>::type = 0>
input_buffer_adapter input_adapter(const ContiguousContainer& c)
{
return input_adapter(std::begin(c), std::end(c));
}
template<class T, std::size_t N>
input_buffer_adapter input_adapter(T (&array)[N])
{
return input_adapter(std::begin(array), std::end(array));
}
// This class only handles inputs of input_buffer_adapter type.
// It's required so that expressions like {ptr, len} can be implicitely casted
// to the correct adapter.
class span_input_adapter
{
public:
// native support
JSON_HEDLEY_NON_NULL(2)
input_adapter(std::FILE* file)
: ia(std::make_shared<file_input_adapter>(file)) {}
/// input adapter for input stream
input_adapter(std::istream& i)
: ia(std::make_shared<input_stream_adapter>(i)) {}
/// input adapter for input stream
input_adapter(std::istream&& i)
: ia(std::make_shared<input_stream_adapter>(i)) {}
input_adapter(const std::wstring& ws)
: ia(std::make_shared<wide_string_input_adapter<std::wstring>>(ws)) {}
input_adapter(const std::u16string& ws)
: ia(std::make_shared<wide_string_input_adapter<std::u16string>>(ws)) {}
input_adapter(const std::u32string& ws)
: ia(std::make_shared<wide_string_input_adapter<std::u32string>>(ws)) {}
/// input adapter for buffer
template<typename CharT,
typename std::enable_if<
std::is_pointer<CharT>::value and
std::is_integral<typename std::remove_pointer<CharT>::type>::value and
sizeof(typename std::remove_pointer<CharT>::type) == 1,
int>::type = 0>
input_adapter(CharT b, std::size_t l)
: ia(std::make_shared<input_buffer_adapter>(reinterpret_cast<const char*>(b), l)) {}
span_input_adapter(CharT b, std::size_t l)
: ia(reinterpret_cast<const char*>(b), l) {}
// derived support
/// input adapter for string literal
template<typename CharT,
typename std::enable_if<
std::is_pointer<CharT>::value and
std::is_integral<typename std::remove_pointer<CharT>::type>::value and
sizeof(typename std::remove_pointer<CharT>::type) == 1,
int>::type = 0>
input_adapter(CharT b)
: input_adapter(reinterpret_cast<const char*>(b),
std::strlen(reinterpret_cast<const char*>(b))) {}
span_input_adapter(CharT b)
: span_input_adapter(reinterpret_cast<const char*>(b),
std::strlen(reinterpret_cast<const char*>(b))) {}
/// input adapter for iterator range with contiguous storage
template<class IteratorType,
typename std::enable_if<
std::is_same<typename iterator_traits<IteratorType>::iterator_category, std::random_access_iterator_tag>::value,
int>::type = 0>
input_adapter(IteratorType first, IteratorType last)
{
#ifndef NDEBUG
// assertion to check that the iterator range is indeed contiguous,
// see https://stackoverflow.com/a/35008842/266378 for more discussion
const auto is_contiguous = std::accumulate(
first, last, std::pair<bool, int>(true, 0),
[&first](std::pair<bool, int> res, decltype(*first) val)
{
res.first &= (val == *(std::next(std::addressof(*first), res.second++)));
return res;
}).first;
assert(is_contiguous);
#endif
span_input_adapter(IteratorType first, IteratorType last)
: ia(input_adapter(first, last)) {}
// assertion to check that each element is 1 byte long
static_assert(
sizeof(typename iterator_traits<IteratorType>::value_type) == 1,
"each element in the iterator range must have the size of 1 byte");
const auto len = static_cast<size_t>(std::distance(first, last));
if (JSON_HEDLEY_LIKELY(len > 0))
{
// there is at least one element: use the address of first
ia = std::make_shared<input_buffer_adapter>(reinterpret_cast<const char*>(&(*first)), len);
}
else
{
// the address of first cannot be used: use nullptr
ia = std::make_shared<input_buffer_adapter>(nullptr, len);
}
}
/// input adapter for array
template<class T, std::size_t N>
input_adapter(T (&array)[N])
: input_adapter(std::begin(array), std::end(array)) {}
span_input_adapter(T (&array)[N])
: span_input_adapter(std::begin(array), std::end(array)) {}
/// input adapter for contiguous container
template<class ContiguousContainer, typename
std::enable_if<not std::is_pointer<ContiguousContainer>::value and
std::is_base_of<std::random_access_iterator_tag, typename iterator_traits<decltype(std::begin(std::declval<ContiguousContainer const>()))>::iterator_category>::value,
int>::type = 0>
input_adapter(const ContiguousContainer& c)
: input_adapter(std::begin(c), std::end(c)) {}
span_input_adapter(const ContiguousContainer& c)
: span_input_adapter(std::begin(c), std::end(c)) {}
operator input_adapter_t()
input_buffer_adapter&& get()
{
return ia;
return std::move(ia);
}
private:
/// the actual adapter
input_adapter_t ia = nullptr;
input_buffer_adapter ia;
};
} // namespace detail
} // namespace nlohmann

View file

@ -22,19 +22,9 @@ namespace detail
// lexer //
///////////
/*!
@brief lexical analysis
This class organizes the lexical analysis during JSON deserialization.
*/
template<typename BasicJsonType>
class lexer
class lexer_base
{
using number_integer_t = typename BasicJsonType::number_integer_t;
using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
using number_float_t = typename BasicJsonType::number_float_t;
using string_t = typename BasicJsonType::string_t;
public:
/// token types for the parser
enum class token_type
@ -75,9 +65,9 @@ class lexer
return "null literal";
case token_type::value_string:
return "string literal";
case lexer::token_type::value_unsigned:
case lexer::token_type::value_integer:
case lexer::token_type::value_float:
case token_type::value_unsigned:
case token_type::value_integer:
case token_type::value_float:
return "number literal";
case token_type::begin_array:
return "'['";
@ -103,15 +93,31 @@ class lexer
// LCOV_EXCL_STOP
}
}
};
/*!
@brief lexical analysis
explicit lexer(detail::input_adapter_t&& adapter)
This class organizes the lexical analysis during JSON deserialization.
*/
template<typename BasicJsonType, typename InputAdapterType>
class lexer : public lexer_base<BasicJsonType>
{
using number_integer_t = typename BasicJsonType::number_integer_t;
using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
using number_float_t = typename BasicJsonType::number_float_t;
using string_t = typename BasicJsonType::string_t;
public:
using token_type = typename lexer_base<BasicJsonType>::token_type;
explicit lexer(InputAdapterType&& adapter)
: ia(std::move(adapter)), decimal_point_char(get_decimal_point()) {}
// delete because of pointer members
lexer(const lexer&) = delete;
lexer(lexer&&) = delete;
lexer(lexer&&) = default;
lexer& operator=(lexer&) = delete;
lexer& operator=(lexer&&) = delete;
lexer& operator=(lexer&&) = default;
~lexer() = default;
private:
@ -1256,7 +1262,7 @@ scan_number_done:
}
else
{
current = ia->get_character();
current = ia.get_character();
}
if (JSON_HEDLEY_LIKELY(current != std::char_traits<char>::eof()))
@ -1480,7 +1486,7 @@ scan_number_done:
private:
/// input adapter
detail::input_adapter_t ia = nullptr;
InputAdapterType ia;
/// the current character
std::char_traits<char>::int_type current = std::char_traits<char>::eof();

View file

@ -24,44 +24,45 @@ namespace detail
// parser //
////////////
enum class parse_event_t : uint8_t
{
/// the parser read `{` and started to process a JSON object
object_start,
/// the parser read `}` and finished processing a JSON object
object_end,
/// the parser read `[` and started to process a JSON array
array_start,
/// the parser read `]` and finished processing a JSON array
array_end,
/// the parser read a key of a value in an object
key,
/// the parser finished reading a JSON value
value
};
template<typename BasicJsonType>
using parser_callback_t =
std::function<bool(int depth, parse_event_t event, BasicJsonType& parsed)>;
/*!
@brief syntax analysis
This class implements a recursive descent parser.
*/
template<typename BasicJsonType>
template<typename BasicJsonType, typename InputAdapterType>
class parser
{
using number_integer_t = typename BasicJsonType::number_integer_t;
using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
using number_float_t = typename BasicJsonType::number_float_t;
using string_t = typename BasicJsonType::string_t;
using lexer_t = lexer<BasicJsonType>;
using lexer_t = lexer<BasicJsonType, InputAdapterType>;
using token_type = typename lexer_t::token_type;
public:
enum class parse_event_t : uint8_t
{
/// the parser read `{` and started to process a JSON object
object_start,
/// the parser read `}` and finished processing a JSON object
object_end,
/// the parser read `[` and started to process a JSON array
array_start,
/// the parser read `]` and finished processing a JSON array
array_end,
/// the parser read a key of a value in an object
key,
/// the parser finished reading a JSON value
value
};
using parser_callback_t =
std::function<bool(int depth, parse_event_t event, BasicJsonType& parsed)>;
/// a parser reading from an input adapter
explicit parser(detail::input_adapter_t&& adapter,
const parser_callback_t cb = nullptr,
explicit parser(InputAdapterType&& adapter,
const parser_callback_t<BasicJsonType> cb = nullptr,
const bool allow_exceptions_ = true)
: callback(cb), m_lexer(std::move(adapter)), allow_exceptions(allow_exceptions_)
{
@ -486,7 +487,7 @@ class parser
private:
/// callback function
const parser_callback_t callback = nullptr;
const parser_callback_t<BasicJsonType> callback = nullptr;
/// the type of the last read token
token_type last_token = token_type::uninitialized;
/// the lexer

View file

@ -1538,7 +1538,7 @@ class binary_writer
private:
/// whether we can assume little endianess
const bool is_little_endian = binary_reader<BasicJsonType>::little_endianess();
const bool is_little_endian = little_endianess();
/// the output
output_adapter_t<CharType> oa = nullptr;