ignore comments

This commit is contained in:
Niels Lohmann 2020-06-16 12:28:59 +02:00
parent e7452d8778
commit b53c6e2f81
No known key found for this signature in database
GPG key ID: 7F3CEA63AE251B69
3 changed files with 190 additions and 6 deletions

View file

@ -112,8 +112,11 @@ class lexer : public lexer_base<BasicJsonType>
public:
using token_type = typename lexer_base<BasicJsonType>::token_type;
explicit lexer(InputAdapterType&& adapter)
: ia(std::move(adapter)), decimal_point_char(static_cast<char_int_type>(get_decimal_point())) {}
explicit lexer(InputAdapterType&& adapter, bool ignore_comments_ = false)
: ia(std::move(adapter))
, ignore_comments(ignore_comments_)
, decimal_point_char(static_cast<char_int_type>(get_decimal_point()))
{}
// delete because of pointer members
lexer(const lexer&) = delete;
@ -131,7 +134,7 @@ class lexer : public lexer_base<BasicJsonType>
JSON_HEDLEY_PURE
static char get_decimal_point() noexcept
{
const auto loc = localeconv();
const auto* loc = localeconv();
assert(loc != nullptr);
return (loc->decimal_point == nullptr) ? '.' : *(loc->decimal_point);
}
@ -826,6 +829,71 @@ class lexer : public lexer_base<BasicJsonType>
}
}
/*!
* @brief scan a comment
* @return whether comment could be scanned successfully
*/
bool scan_comment()
{
// remember character after '/' to distinguish comment types
const auto comment_char = get();
// expect // or /* to start a comment
if (comment_char != '/' and comment_char != '*')
{
return false;
}
while (true)
{
switch (get())
{
// EOF inside a /* comment is an error, in // it is OK
case std::char_traits<char_type>::eof():
case '\0':
{
return comment_char == '/';
}
// a newline ends the // comment
case '\n':
case '\r':
{
if (comment_char == '/')
{
return true;
}
break;
}
// */ ends the /* comment
case '*':
{
if (comment_char == '*')
{
switch (get())
{
case '/':
{
return true;
}
default:
{
unget();
break;
}
}
}
break;
}
default:
break;
}
}
}
JSON_HEDLEY_NON_NULL(2)
static void strtof(float& f, const char* str, char** endptr) noexcept
{
@ -1431,6 +1499,17 @@ scan_number_done:
}
while (current == ' ' or current == '\t' or current == '\n' or current == '\r');
// ignore comments
if (ignore_comments and current == '/')
{
if (not scan_comment())
{
error_message = "invalid comment";
return token_type::parse_error;
}
get();
}
switch (current)
{
// structural characters
@ -1499,6 +1578,9 @@ scan_number_done:
/// input adapter
InputAdapterType ia;
/// whether comments should be ignored (true) or signaled as errors (false)
const bool ignore_comments = false;
/// the current character
char_int_type current = std::char_traits<char_type>::eof();

View file

@ -8179,8 +8179,11 @@ class lexer : public lexer_base<BasicJsonType>
public:
using token_type = typename lexer_base<BasicJsonType>::token_type;
explicit lexer(InputAdapterType&& adapter)
: ia(std::move(adapter)), decimal_point_char(static_cast<char_int_type>(get_decimal_point())) {}
explicit lexer(InputAdapterType&& adapter, bool ignore_comments_ = false)
: ia(std::move(adapter))
, ignore_comments(ignore_comments_)
, decimal_point_char(static_cast<char_int_type>(get_decimal_point()))
{}
// delete because of pointer members
lexer(const lexer&) = delete;
@ -8198,7 +8201,7 @@ class lexer : public lexer_base<BasicJsonType>
JSON_HEDLEY_PURE
static char get_decimal_point() noexcept
{
const auto loc = localeconv();
const auto* loc = localeconv();
assert(loc != nullptr);
return (loc->decimal_point == nullptr) ? '.' : *(loc->decimal_point);
}
@ -8893,6 +8896,71 @@ class lexer : public lexer_base<BasicJsonType>
}
}
/*!
* @brief scan a comment
* @return whether comment could be scanned successfully
*/
bool scan_comment()
{
// remember character after '/' to distinguish comment types
const auto comment_char = get();
// expect // or /* to start a comment
if (comment_char != '/' and comment_char != '*')
{
return false;
}
while (true)
{
switch (get())
{
// EOF inside a /* comment is an error, in // it is OK
case std::char_traits<char_type>::eof():
case '\0':
{
return comment_char == '/';
}
// a newline ends the // comment
case '\n':
case '\r':
{
if (comment_char == '/')
{
return true;
}
break;
}
// */ ends the /* comment
case '*':
{
if (comment_char == '*')
{
switch (get())
{
case '/':
{
return true;
}
default:
{
unget();
break;
}
}
}
break;
}
default:
break;
}
}
}
JSON_HEDLEY_NON_NULL(2)
static void strtof(float& f, const char* str, char** endptr) noexcept
{
@ -9498,6 +9566,17 @@ scan_number_done:
}
while (current == ' ' or current == '\t' or current == '\n' or current == '\r');
// ignore comments
if (ignore_comments and current == '/')
{
if (not scan_comment())
{
error_message = "invalid comment";
return token_type::parse_error;
}
get();
}
switch (current)
{
// structural characters
@ -9566,6 +9645,9 @@ scan_number_done:
/// input adapter
InputAdapterType ia;
/// whether comments should be ignored (true) or signaled as errors (false)
const bool ignore_comments = false;
/// the current character
char_int_type current = std::char_traits<char_type>::eof();

View file

@ -127,6 +127,8 @@ TEST_CASE("lexer class")
// store scan() result
const auto res = scan_string(s.c_str());
CAPTURE(s);
switch (c)
{
// single characters that are valid tokens
@ -161,6 +163,9 @@ TEST_CASE("lexer class")
break;
}
// case ('/'):
// break;
// anything else is not expected
default:
{
@ -179,4 +184,19 @@ TEST_CASE("lexer class")
s += "\"";
CHECK((scan_string(s.c_str()) == json::lexer::token_type::value_string));
}
// SECTION("ignore comments")
// {
// CHECK((scan_string("/") == json::lexer::token_type::parse_error));
//
// CHECK((scan_string("/!") == json::lexer::token_type::parse_error));
// CHECK((scan_string("/*") == json::lexer::token_type::parse_error));
// CHECK((scan_string("/**") == json::lexer::token_type::parse_error));
//
// CHECK((scan_string("//") == json::lexer::token_type::end_of_input));
// CHECK((scan_string("/**/") == json::lexer::token_type::end_of_input));
// CHECK((scan_string("/** /") == json::lexer::token_type::parse_error));
//
// CHECK((scan_string("/***/") == json::lexer::token_type::end_of_input));
// }
}