Remove outdated commentary about the value of eof(), retain input type
o We assume the same character int_type as the unerlying std::istream o There are no assumptions on the value of eof(), other than that it will not be a valid unsigned char value. o To retain performance, we do not allow swapping out the underlying std::streambuf during our use of the std::istream for parsing.
This commit is contained in:
parent
45e1e3d48a
commit
23440eb86e
1 changed files with 33 additions and 16 deletions
49
src/json.hpp
49
src/json.hpp
|
@ -1394,10 +1394,22 @@ constexpr T static_const<T>::value;
|
||||||
// input adapters //
|
// input adapters //
|
||||||
////////////////////
|
////////////////////
|
||||||
|
|
||||||
/// abstract input adapter interface
|
/*!
|
||||||
|
@brief abstract input adapter interface
|
||||||
|
|
||||||
|
Produces a stream of std::char_traits<char>::int_type characters from a
|
||||||
|
std::istream, a buffer, or some other input type. Accepts the return of exactly
|
||||||
|
one non-EOF character for future input. The int_type characters returned
|
||||||
|
consist of all valid char values as positive values (typically unsigned char),
|
||||||
|
plus an EOF value outside that range, specified by the value of the function
|
||||||
|
std::char_traits<char>::eof(). This value is typically -1, but could be any
|
||||||
|
arbitrary value which is not a valid char value.
|
||||||
|
|
||||||
|
@return Typically [0,255] plus std::char_traits<char>::eof().
|
||||||
|
*/
|
||||||
struct input_adapter_protocol
|
struct input_adapter_protocol
|
||||||
{
|
{
|
||||||
virtual int get_character() = 0; // returns characters in range [0,255], or eof()
|
virtual std::char_traits<char>::int_type get_character() = 0;
|
||||||
virtual void unget_character() = 0; // restore the last non-eof() character to input
|
virtual void unget_character() = 0; // restore the last non-eof() character to input
|
||||||
virtual ~input_adapter_protocol() = default;
|
virtual ~input_adapter_protocol() = default;
|
||||||
};
|
};
|
||||||
|
@ -1405,8 +1417,13 @@ struct input_adapter_protocol
|
||||||
/// a type to simplify interfaces
|
/// a type to simplify interfaces
|
||||||
using input_adapter_t = std::shared_ptr<input_adapter_protocol>;
|
using input_adapter_t = std::shared_ptr<input_adapter_protocol>;
|
||||||
|
|
||||||
|
/// input adapter for a (caching) istream. Ignores a UFT Byte Order Mark at
|
||||||
/// input adapter for a (caching) istream. Ignores a UFT Byte Order Mark at beginning of input.
|
/// beginning of input. Does not support changing the underlying std::streambuf
|
||||||
|
/// in mid-input. Maintains underlying std::istream and std::streambuf to
|
||||||
|
/// support subsequent use of standard std::istream operations to process any
|
||||||
|
/// input characters following those used in parsing the JSON input. Clears the
|
||||||
|
/// std::istream flags; any input errors (eg. EOF) will be detected by the first
|
||||||
|
/// subsequent call for input from the std::istream.
|
||||||
class input_stream_adapter : public input_adapter_protocol
|
class input_stream_adapter : public input_adapter_protocol
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
@ -1417,10 +1434,10 @@ class input_stream_adapter : public input_adapter_protocol
|
||||||
}
|
}
|
||||||
explicit input_stream_adapter(std::istream& i)
|
explicit input_stream_adapter(std::istream& i)
|
||||||
: is(i)
|
: is(i)
|
||||||
, sb(i.rdbuf())
|
, sb(*i.rdbuf())
|
||||||
{
|
{
|
||||||
// Ignore Byte Order Mark at start of input
|
// Ignore Byte Order Mark at start of input
|
||||||
int c;
|
std::char_traits<char>::int_type c;
|
||||||
if (( c = get_character() ) == 0xEF )
|
if (( c = get_character() ) == 0xEF )
|
||||||
{
|
{
|
||||||
if (( c = get_character() ) == 0xBB )
|
if (( c = get_character() ) == 0xBB )
|
||||||
|
@ -1454,20 +1471,20 @@ class input_stream_adapter : public input_adapter_protocol
|
||||||
// std::istream/std::streambuf use std::char_traits<char>::to_int_type, to
|
// std::istream/std::streambuf use std::char_traits<char>::to_int_type, to
|
||||||
// ensure that std::char_traits<char>::eof() and the character 0xff do not
|
// ensure that std::char_traits<char>::eof() and the character 0xff do not
|
||||||
// end up as the same value, eg. 0xffffffff.
|
// end up as the same value, eg. 0xffffffff.
|
||||||
int get_character() override
|
std::char_traits<char>::int_type get_character() override
|
||||||
{
|
{
|
||||||
return sb->sbumpc();
|
return sb.sbumpc();
|
||||||
}
|
}
|
||||||
|
|
||||||
void unget_character() override
|
void unget_character() override
|
||||||
{
|
{
|
||||||
sb->sungetc(); // Avoided for performance: is.unget();
|
sb.sungetc(); // Avoided for performance: is.unget();
|
||||||
}
|
}
|
||||||
private:
|
private:
|
||||||
|
|
||||||
/// the associated input stream
|
/// the associated input stream
|
||||||
std::istream& is;
|
std::istream& is;
|
||||||
std::streambuf *sb;
|
std::streambuf &sb;
|
||||||
};
|
};
|
||||||
|
|
||||||
/// input adapter for buffer input
|
/// input adapter for buffer input
|
||||||
|
@ -1488,7 +1505,7 @@ class input_buffer_adapter : public input_adapter_protocol
|
||||||
input_buffer_adapter(const input_buffer_adapter&) = delete;
|
input_buffer_adapter(const input_buffer_adapter&) = delete;
|
||||||
input_buffer_adapter& operator=(input_buffer_adapter&) = delete;
|
input_buffer_adapter& operator=(input_buffer_adapter&) = delete;
|
||||||
|
|
||||||
int get_character() noexcept override
|
std::char_traits<char>::int_type get_character() noexcept override
|
||||||
{
|
{
|
||||||
if (JSON_LIKELY(cursor < limit))
|
if (JSON_LIKELY(cursor < limit))
|
||||||
{
|
{
|
||||||
|
@ -2664,13 +2681,13 @@ scan_number_done:
|
||||||
@brief get next character from the input
|
@brief get next character from the input
|
||||||
|
|
||||||
This function provides the interface to the used input adapter. It does
|
This function provides the interface to the used input adapter. It does
|
||||||
not throw in case the input reached EOF, but returns a -'ve valued
|
not throw in case the input reached EOF, but returns a
|
||||||
`std::char_traits<char>::eof()` in that case. Stores the scanned characters
|
`std::char_traits<char>::eof()` in that case. Stores the scanned characters
|
||||||
for use in error messages.
|
for use in error messages.
|
||||||
|
|
||||||
@return character read from the input
|
@return character read from the input
|
||||||
*/
|
*/
|
||||||
int get()
|
std::char_traits<char>::int_type get()
|
||||||
{
|
{
|
||||||
++chars_read;
|
++chars_read;
|
||||||
current = ia->get_character();
|
current = ia->get_character();
|
||||||
|
@ -2739,8 +2756,8 @@ scan_number_done:
|
||||||
}
|
}
|
||||||
|
|
||||||
/// return the last read token (for errors only). Will never contain EOF
|
/// return the last read token (for errors only). Will never contain EOF
|
||||||
/// (a -'ve value), because 255 may legitimately occur. May contain NUL, which
|
/// (an arbitrary value that is not a valid char value, often -1), because
|
||||||
/// should be escaped.
|
/// 255 may legitimately occur. May contain NUL, which should be escaped.
|
||||||
std::string get_token_string() const
|
std::string get_token_string() const
|
||||||
{
|
{
|
||||||
// escape control characters
|
// escape control characters
|
||||||
|
@ -2844,7 +2861,7 @@ scan_number_done:
|
||||||
detail::input_adapter_t ia = nullptr;
|
detail::input_adapter_t ia = nullptr;
|
||||||
|
|
||||||
/// the current character
|
/// the current character
|
||||||
int current = std::char_traits<char>::eof();
|
std::char_traits<char>::int_type current = std::char_traits<char>::eof();
|
||||||
|
|
||||||
/// the number of characters read
|
/// the number of characters read
|
||||||
std::size_t chars_read = 0;
|
std::size_t chars_read = 0;
|
||||||
|
|
Loading…
Reference in a new issue