🔨 removing unget_character() function from input adapters #834
This commit is contained in:
parent
ba6edd5634
commit
aa89c5e048
4 changed files with 168 additions and 142 deletions
|
|
@ -1081,7 +1081,16 @@ scan_number_done:
|
|||
std::char_traits<char>::int_type get()
|
||||
{
|
||||
++chars_read;
|
||||
current = ia->get_character();
|
||||
if (next_unget)
|
||||
{
|
||||
// just reset the next_unget variable and work with current
|
||||
next_unget = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
current = ia->get_character();
|
||||
}
|
||||
|
||||
if (JSON_LIKELY(current != std::char_traits<char>::eof()))
|
||||
{
|
||||
token_string.push_back(std::char_traits<char>::to_char_type(current));
|
||||
|
|
@ -1089,13 +1098,20 @@ scan_number_done:
|
|||
return current;
|
||||
}
|
||||
|
||||
/// unget current character (return it again on next get)
|
||||
/*!
|
||||
@brief unget current character (read it again on next get)
|
||||
|
||||
We implement unget by setting variable next_unget to true. The input is not
|
||||
changed - we just simulate ungetting by modifying chars_read and
|
||||
token_string. The next call to get() will behave as if the unget character
|
||||
is read again.
|
||||
*/
|
||||
void unget()
|
||||
{
|
||||
next_unget = true;
|
||||
--chars_read;
|
||||
if (JSON_LIKELY(current != std::char_traits<char>::eof()))
|
||||
{
|
||||
ia->unget_character();
|
||||
assert(token_string.size() != 0);
|
||||
token_string.pop_back();
|
||||
}
|
||||
|
|
@ -1183,8 +1199,43 @@ scan_number_done:
|
|||
// actual scanner
|
||||
/////////////////////
|
||||
|
||||
/*!
|
||||
@brief skip the UTF-8 byte order mark
|
||||
@return true iff there is no BOM or the correct BOM has been skipped
|
||||
*/
|
||||
bool skip_bom()
|
||||
{
|
||||
if (get() == 0xEF)
|
||||
{
|
||||
if (get() == 0xBB and get() == 0xBF)
|
||||
{
|
||||
// we completely parsed the BOM
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
// after reading 0xEF, an unexpected character followed
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// the first character is not the beginning of the BOM; unget it to
|
||||
// process is later
|
||||
unget();
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
token_type scan()
|
||||
{
|
||||
// initially, skip the BOM
|
||||
if (chars_read == 0 and not skip_bom())
|
||||
{
|
||||
error_message = "invalid BOM; must be 0xEF 0xBB 0xBF if given";
|
||||
return token_type::parse_error;
|
||||
}
|
||||
|
||||
// read next character and ignore whitespace
|
||||
do
|
||||
{
|
||||
|
|
@ -1254,6 +1305,9 @@ scan_number_done:
|
|||
/// the current character
|
||||
std::char_traits<char>::int_type current = std::char_traits<char>::eof();
|
||||
|
||||
/// whether the next get() call should just return current
|
||||
bool next_unget = false;
|
||||
|
||||
/// the number of characters read
|
||||
std::size_t chars_read = 0;
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue