+ moved lexer to class
This commit is contained in:
parent
e845cd1db8
commit
8a4e127a57
3 changed files with 1010 additions and 1101 deletions
1339
src/json.hpp
1339
src/json.hpp
File diff suppressed because it is too large
Load diff
|
@ -14,6 +14,7 @@
|
||||||
#include <type_traits>
|
#include <type_traits>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
#include <cmath>
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
- ObjectType trick from http://stackoverflow.com/a/9860911
|
- ObjectType trick from http://stackoverflow.com/a/9860911
|
||||||
|
@ -2384,9 +2385,9 @@ class basic_json
|
||||||
// parser //
|
// parser //
|
||||||
////////////
|
////////////
|
||||||
|
|
||||||
class parser
|
class lexer
|
||||||
{
|
{
|
||||||
private:
|
public:
|
||||||
/// token types for the parser
|
/// token types for the parser
|
||||||
enum class token_type
|
enum class token_type
|
||||||
{
|
{
|
||||||
|
@ -2406,238 +2407,40 @@ class basic_json
|
||||||
end_of_input
|
end_of_input
|
||||||
};
|
};
|
||||||
|
|
||||||
/// the type of a lexer character
|
inline lexer(const char* s) : m_content(s)
|
||||||
using lexer_char_t = unsigned char;
|
|
||||||
|
|
||||||
public:
|
|
||||||
/// constructor for strings
|
|
||||||
inline parser(const std::string& s) : buffer(s)
|
|
||||||
{
|
{
|
||||||
// set buffer for RE2C
|
m_start = m_cursor = m_content;
|
||||||
m_cursor = reinterpret_cast<const lexer_char_t*>(buffer.c_str());
|
m_limit = m_content + strlen(m_content);
|
||||||
// set a pointer past the end of the buffer
|
|
||||||
m_limit = m_cursor + buffer.size();
|
|
||||||
// read first token
|
|
||||||
get_token();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// a parser reading from an input stream
|
inline lexer() = default;
|
||||||
inline parser(std::istream& _is)
|
|
||||||
|
/*!max:re2c */
|
||||||
|
|
||||||
|
inline token_type scan()
|
||||||
{
|
{
|
||||||
while (_is)
|
#define YYFILL(n)
|
||||||
{
|
|
||||||
std::string input_line;
|
|
||||||
std::getline(_is, input_line);
|
|
||||||
buffer += input_line;
|
|
||||||
}
|
|
||||||
|
|
||||||
// set buffer for RE2C
|
|
||||||
m_cursor = reinterpret_cast<const lexer_char_t*>(buffer.c_str());
|
|
||||||
// set a pointer past the end of the buffer
|
|
||||||
m_limit = m_cursor + buffer.size();
|
|
||||||
// read first token
|
|
||||||
get_token();
|
|
||||||
}
|
|
||||||
|
|
||||||
inline basic_json parse()
|
|
||||||
{
|
|
||||||
switch (last_token)
|
|
||||||
{
|
|
||||||
case (token_type::begin_object):
|
|
||||||
{
|
|
||||||
// explicitly set result to object to cope with {}
|
|
||||||
basic_json result(value_t::object);
|
|
||||||
|
|
||||||
// read next token
|
|
||||||
get_token();
|
|
||||||
|
|
||||||
// closing } -> we are done
|
|
||||||
if (last_token == token_type::end_object)
|
|
||||||
{
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
// otherwise: parse key-value pairs
|
|
||||||
do
|
|
||||||
{
|
|
||||||
// store key
|
|
||||||
expect_new(token_type::value_string);
|
|
||||||
const auto key = get_string();
|
|
||||||
|
|
||||||
// parse separator (:)
|
|
||||||
get_token();
|
|
||||||
expect_new(token_type::name_separator);
|
|
||||||
|
|
||||||
// parse value
|
|
||||||
get_token();
|
|
||||||
result[key] = parse();
|
|
||||||
|
|
||||||
// read next character
|
|
||||||
get_token();
|
|
||||||
}
|
|
||||||
while (last_token == token_type::value_separator
|
|
||||||
and get_token() == last_token);
|
|
||||||
|
|
||||||
// closing }
|
|
||||||
expect_new(token_type::end_object);
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
case (token_type::begin_array):
|
|
||||||
{
|
|
||||||
// explicitly set result to object to cope with []
|
|
||||||
basic_json result(value_t::array);
|
|
||||||
|
|
||||||
// read next token
|
|
||||||
get_token();
|
|
||||||
|
|
||||||
// closing ] -> we are done
|
|
||||||
if (last_token == token_type::end_array)
|
|
||||||
{
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
// otherwise: parse values
|
|
||||||
do
|
|
||||||
{
|
|
||||||
// parse value
|
|
||||||
result.push_back(parse());
|
|
||||||
|
|
||||||
// read next character
|
|
||||||
get_token();
|
|
||||||
}
|
|
||||||
while (last_token == token_type::value_separator
|
|
||||||
and get_token() == last_token);
|
|
||||||
|
|
||||||
// closing ]
|
|
||||||
expect_new(token_type::end_array);
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
case (token_type::literal_null):
|
|
||||||
{
|
|
||||||
return basic_json(nullptr);
|
|
||||||
}
|
|
||||||
|
|
||||||
case (token_type::value_string):
|
|
||||||
{
|
|
||||||
return basic_json(get_string());
|
|
||||||
}
|
|
||||||
|
|
||||||
case (token_type::literal_true):
|
|
||||||
{
|
|
||||||
return basic_json(true);
|
|
||||||
}
|
|
||||||
|
|
||||||
case (token_type::literal_false):
|
|
||||||
{
|
|
||||||
return basic_json(false);
|
|
||||||
}
|
|
||||||
|
|
||||||
case (token_type::value_number):
|
|
||||||
{
|
|
||||||
// The pointer m_begin points to the beginning of the
|
|
||||||
// parsed number. We pass this pointer to std::strtod which
|
|
||||||
// sets endptr to the first character past the converted
|
|
||||||
// number. If this pointer is not the same as m_cursor,
|
|
||||||
// then either more or less characters have been used
|
|
||||||
// during the comparison. This can happen for inputs like
|
|
||||||
// "01" which will be treated like number 0 followed by
|
|
||||||
// number 1.
|
|
||||||
|
|
||||||
// conversion
|
|
||||||
char* endptr;
|
|
||||||
const auto float_val = std::strtod(reinterpret_cast<const char*>(m_begin), &endptr);
|
|
||||||
|
|
||||||
// check if strtod read beyond the end of the lexem
|
|
||||||
if (reinterpret_cast<const lexer_char_t*>(endptr) != m_cursor)
|
|
||||||
{
|
|
||||||
throw std::invalid_argument(std::string("parse error - ") +
|
|
||||||
reinterpret_cast<const char*>(m_begin) + " is not a number");
|
|
||||||
}
|
|
||||||
|
|
||||||
// check if conversion loses precision
|
|
||||||
const auto int_val = static_cast<int>(float_val);
|
|
||||||
if (float_val == int_val)
|
|
||||||
{
|
|
||||||
// we basic_json not lose precision -> return int
|
|
||||||
return basic_json(int_val);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// we would lose precision -> returnfloat
|
|
||||||
return basic_json(float_val);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
default:
|
|
||||||
{
|
|
||||||
std::string error_msg = "parse error - unexpected \'";
|
|
||||||
error_msg += static_cast<char>(m_begin[0]);
|
|
||||||
error_msg += "\' (";
|
|
||||||
error_msg += token_type_name(last_token) + ")";
|
|
||||||
throw std::invalid_argument(error_msg);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
/*!
|
|
||||||
This function implements a scanner for JSON. It is specified using
|
|
||||||
regular expressions that try to follow RFC 7159 and ECMA-404 as close
|
|
||||||
as possible. These regular expressions are then translated into a
|
|
||||||
deterministic finite automaton (DFA) by the tool RE2C. As a result, the
|
|
||||||
translated code for this function consists of a large block of code
|
|
||||||
with goto jumps.
|
|
||||||
|
|
||||||
@return the class of the next token read from the buffer
|
|
||||||
|
|
||||||
@todo Unicode support needs to be checked.
|
|
||||||
*/
|
|
||||||
inline token_type get_token()
|
|
||||||
{
|
|
||||||
// needed by RE2C
|
|
||||||
const lexer_char_t* marker = nullptr;
|
|
||||||
|
|
||||||
// set up RE2C
|
|
||||||
/*!re2c
|
/*!re2c
|
||||||
re2c:labelprefix = "json_parser_";
|
|
||||||
re2c:yyfill:enable = 0;
|
|
||||||
re2c:define:YYCURSOR = m_cursor;
|
re2c:define:YYCURSOR = m_cursor;
|
||||||
re2c:define:YYCTYPE = lexer_char_t;
|
|
||||||
re2c:define:YYMARKER = marker;
|
|
||||||
re2c:indent:string = " ";
|
|
||||||
re2c:define:YYLIMIT = m_limit;
|
re2c:define:YYLIMIT = m_limit;
|
||||||
*/
|
re2c:define:YYCTYPE = char;
|
||||||
|
re2c:define:YYCTXMARKER = m_ctxmarker;
|
||||||
json_parser_lexer_start:
|
re2c:define:YYMARKER = m_marker;
|
||||||
// set current to the begin of the buffer
|
re2c:indent:top = 1;
|
||||||
m_begin = m_cursor;
|
re2c:yyfill:enable = 0;
|
||||||
|
|
||||||
if (m_begin == m_limit)
|
|
||||||
{
|
|
||||||
return last_token = token_type::end_of_input;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*!re2c
|
|
||||||
// whitespace
|
|
||||||
ws = [ \t\n\r]*;
|
|
||||||
ws { goto json_parser_lexer_start; }
|
|
||||||
|
|
||||||
// structural characters
|
// structural characters
|
||||||
"[" { return last_token = token_type::begin_array; }
|
"[" { return token_type::begin_array; }
|
||||||
"]" { return last_token = token_type::end_array; }
|
"]" { return token_type::end_array; }
|
||||||
"{" { return last_token = token_type::begin_object; }
|
"{" { return token_type::begin_object; }
|
||||||
"}" { return last_token = token_type::end_object; }
|
"}" { return token_type::end_object; }
|
||||||
"," { return last_token = token_type::value_separator; }
|
"," { return token_type::value_separator; }
|
||||||
":" { return last_token = token_type::name_separator; }
|
":" { return token_type::name_separator; }
|
||||||
|
|
||||||
// literal names
|
// literal names
|
||||||
"null" { return last_token = token_type::literal_null; }
|
"null" { return token_type::literal_null; }
|
||||||
"true" { return last_token = token_type::literal_true; }
|
"true" { return token_type::literal_true; }
|
||||||
"false" { return last_token = token_type::literal_false; }
|
"false" { return token_type::literal_false; }
|
||||||
|
|
||||||
// number
|
// number
|
||||||
decimal_point = [.];
|
decimal_point = [.];
|
||||||
|
@ -2651,7 +2454,7 @@ json_parser_lexer_start:
|
||||||
frac = decimal_point digit+;
|
frac = decimal_point digit+;
|
||||||
int = (zero|digit_1_9 digit*);
|
int = (zero|digit_1_9 digit*);
|
||||||
number = minus? int frac? exp?;
|
number = minus? int frac? exp?;
|
||||||
number { return last_token = token_type::value_number; }
|
number { return token_type::value_number; }
|
||||||
|
|
||||||
// string
|
// string
|
||||||
quotation_mark = [\"];
|
quotation_mark = [\"];
|
||||||
|
@ -2660,58 +2463,16 @@ json_parser_lexer_start:
|
||||||
escaped = escape ([\"\\/bfnrt] | [u][0-9a-fA-F]{4});
|
escaped = escape ([\"\\/bfnrt] | [u][0-9a-fA-F]{4});
|
||||||
char = unescaped | escaped;
|
char = unescaped | escaped;
|
||||||
string = quotation_mark char* quotation_mark;
|
string = quotation_mark char* quotation_mark;
|
||||||
string { return last_token = token_type::value_string; }
|
string { return token_type::value_string; }
|
||||||
|
|
||||||
// anything else is an error
|
// end of file
|
||||||
* { return last_token = token_type::parse_error; }
|
'\000' { return token_type::end_of_input; }
|
||||||
*/
|
*/
|
||||||
}
|
}
|
||||||
|
|
||||||
inline static std::string token_type_name(token_type t)
|
inline std::string get_string_value() const
|
||||||
{
|
{
|
||||||
switch (t)
|
return std::string(m_start, static_cast<size_t>(m_cursor - m_start));
|
||||||
{
|
|
||||||
case (token_type::uninitialized):
|
|
||||||
return "<uninitialized>";
|
|
||||||
case (token_type::literal_true):
|
|
||||||
return "true literal";
|
|
||||||
case (token_type::literal_false):
|
|
||||||
return "false literal";
|
|
||||||
case (token_type::literal_null):
|
|
||||||
return "null literal";
|
|
||||||
case (token_type::value_string):
|
|
||||||
return "string literal";
|
|
||||||
case (token_type::value_number):
|
|
||||||
return "number literal";
|
|
||||||
case (token_type::begin_array):
|
|
||||||
return "[";
|
|
||||||
case (token_type::begin_object):
|
|
||||||
return "{";
|
|
||||||
case (token_type::end_array):
|
|
||||||
return "]";
|
|
||||||
case (token_type::end_object):
|
|
||||||
return "}";
|
|
||||||
case (token_type::name_separator):
|
|
||||||
return ":";
|
|
||||||
case (token_type::value_separator):
|
|
||||||
return ",";
|
|
||||||
case (token_type::parse_error):
|
|
||||||
return "<parse error>";
|
|
||||||
case (token_type::end_of_input):
|
|
||||||
return "<end of input>";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void expect_new(token_type t)
|
|
||||||
{
|
|
||||||
if (t != last_token)
|
|
||||||
{
|
|
||||||
std::string error_msg = "parse error - unexpected \'";
|
|
||||||
error_msg += static_cast<char>(m_begin[0]);
|
|
||||||
error_msg += "\' (" + token_type_name(last_token);
|
|
||||||
error_msg += "); expected " + token_type_name(t);
|
|
||||||
throw std::invalid_argument(error_msg);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
|
@ -2727,23 +2488,266 @@ json_parser_lexer_start:
|
||||||
*/
|
*/
|
||||||
inline std::string get_string() const
|
inline std::string get_string() const
|
||||||
{
|
{
|
||||||
return std::string(
|
return std::string(m_start + 1, static_cast<size_t>(m_cursor - m_start - 2));
|
||||||
reinterpret_cast<const char*>(m_begin + 1),
|
}
|
||||||
static_cast<std::size_t>(m_cursor - m_begin - 2)
|
|
||||||
);
|
inline number_float_t get_number() const
|
||||||
|
{
|
||||||
|
// The pointer m_begin points to the beginning of the
|
||||||
|
// parsed number. We pass this pointer to std::strtod which
|
||||||
|
// sets endptr to the first character past the converted
|
||||||
|
// number. If this pointer is not the same as m_cursor,
|
||||||
|
// then either more or less characters have been used
|
||||||
|
// during the comparison. This can happen for inputs like
|
||||||
|
// "01" which will be treated like number 0 followed by
|
||||||
|
// number 1.
|
||||||
|
|
||||||
|
// conversion
|
||||||
|
char* endptr;
|
||||||
|
const auto float_val = std::strtod(reinterpret_cast<const char*>(m_start), &endptr);
|
||||||
|
|
||||||
|
// check if strtod read beyond the end of the lexem
|
||||||
|
if (endptr != m_cursor)
|
||||||
|
{
|
||||||
|
std::cerr << get_string_value() << std::endl;
|
||||||
|
return NAN;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
return float_val;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
const char* m_content = nullptr;
|
||||||
|
|
||||||
|
const char* m_start = nullptr;
|
||||||
|
const char* m_cursor = nullptr;
|
||||||
|
const char* m_limit = nullptr;
|
||||||
|
const char* m_marker = nullptr;
|
||||||
|
const char* m_ctxmarker = nullptr;
|
||||||
|
};
|
||||||
|
|
||||||
|
class parser
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
/// constructor for strings
|
||||||
|
inline parser(const std::string& s) : m_buffer(s), m_lexer(m_buffer.c_str())
|
||||||
|
{
|
||||||
|
// read first token
|
||||||
|
get_token();
|
||||||
|
}
|
||||||
|
|
||||||
|
/// a parser reading from an input stream
|
||||||
|
inline parser(std::istream& _is)
|
||||||
|
{
|
||||||
|
while (_is)
|
||||||
|
{
|
||||||
|
std::string input_line;
|
||||||
|
std::getline(_is, input_line);
|
||||||
|
m_buffer += input_line;
|
||||||
|
}
|
||||||
|
|
||||||
|
// initializer lexer
|
||||||
|
m_lexer = lexer(m_buffer.c_str());
|
||||||
|
|
||||||
|
// read first token
|
||||||
|
get_token();
|
||||||
|
}
|
||||||
|
|
||||||
|
inline basic_json parse()
|
||||||
|
{
|
||||||
|
switch (last_token)
|
||||||
|
{
|
||||||
|
case (lexer::token_type::begin_object):
|
||||||
|
{
|
||||||
|
// explicitly set result to object to cope with {}
|
||||||
|
basic_json result(value_t::object);
|
||||||
|
|
||||||
|
// read next token
|
||||||
|
get_token();
|
||||||
|
|
||||||
|
// closing } -> we are done
|
||||||
|
if (last_token == lexer::token_type::end_object)
|
||||||
|
{
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
// otherwise: parse key-value pairs
|
||||||
|
do
|
||||||
|
{
|
||||||
|
// store key
|
||||||
|
expect(lexer::token_type::value_string);
|
||||||
|
const auto key = m_lexer.get_string();
|
||||||
|
|
||||||
|
// parse separator (:)
|
||||||
|
get_token();
|
||||||
|
expect(lexer::token_type::name_separator);
|
||||||
|
|
||||||
|
// parse value
|
||||||
|
get_token();
|
||||||
|
result[key] = parse();
|
||||||
|
|
||||||
|
// read next character
|
||||||
|
get_token();
|
||||||
|
}
|
||||||
|
while (last_token == lexer::token_type::value_separator
|
||||||
|
and get_token() == last_token);
|
||||||
|
|
||||||
|
// closing }
|
||||||
|
expect(lexer::token_type::end_object);
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
case (lexer::token_type::begin_array):
|
||||||
|
{
|
||||||
|
// explicitly set result to object to cope with []
|
||||||
|
basic_json result(value_t::array);
|
||||||
|
|
||||||
|
// read next token
|
||||||
|
get_token();
|
||||||
|
|
||||||
|
// closing ] -> we are done
|
||||||
|
if (last_token == lexer::token_type::end_array)
|
||||||
|
{
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
// otherwise: parse values
|
||||||
|
do
|
||||||
|
{
|
||||||
|
// parse value
|
||||||
|
result.push_back(parse());
|
||||||
|
|
||||||
|
// read next character
|
||||||
|
get_token();
|
||||||
|
}
|
||||||
|
while (last_token == lexer::token_type::value_separator
|
||||||
|
and get_token() == last_token);
|
||||||
|
|
||||||
|
// closing ]
|
||||||
|
expect(lexer::token_type::end_array);
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
case (lexer::token_type::literal_null):
|
||||||
|
{
|
||||||
|
return basic_json(nullptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
case (lexer::token_type::value_string):
|
||||||
|
{
|
||||||
|
return basic_json(m_lexer.get_string());
|
||||||
|
}
|
||||||
|
|
||||||
|
case (lexer::token_type::literal_true):
|
||||||
|
{
|
||||||
|
return basic_json(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
case (lexer::token_type::literal_false):
|
||||||
|
{
|
||||||
|
return basic_json(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
case (lexer::token_type::value_number):
|
||||||
|
{
|
||||||
|
auto float_val = m_lexer.get_number();
|
||||||
|
|
||||||
|
if (std::isnan(float_val))
|
||||||
|
{
|
||||||
|
throw std::invalid_argument(std::string("parse error - ") +
|
||||||
|
m_lexer.get_string_value() + " is not a number");
|
||||||
|
}
|
||||||
|
|
||||||
|
// check if conversion loses precision
|
||||||
|
const auto int_val = static_cast<number_integer_t>(float_val);
|
||||||
|
if (float_val == int_val)
|
||||||
|
{
|
||||||
|
// we basic_json not lose precision -> return int
|
||||||
|
return basic_json(int_val);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// we would lose precision -> returnfloat
|
||||||
|
return basic_json(float_val);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
default:
|
||||||
|
{
|
||||||
|
std::string error_msg = "parse error - unexpected \'";
|
||||||
|
error_msg += m_lexer.get_string_value();
|
||||||
|
error_msg += "\' (";
|
||||||
|
error_msg += token_type_name(last_token) + ")";
|
||||||
|
throw std::invalid_argument(error_msg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
/// get next token from lexer
|
||||||
|
inline typename lexer::token_type get_token()
|
||||||
|
{
|
||||||
|
last_token = m_lexer.scan();
|
||||||
|
return last_token;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline static std::string token_type_name(typename lexer::token_type t)
|
||||||
|
{
|
||||||
|
switch (t)
|
||||||
|
{
|
||||||
|
case (lexer::token_type::uninitialized):
|
||||||
|
return "<uninitialized>";
|
||||||
|
case (lexer::token_type::literal_true):
|
||||||
|
return "true literal";
|
||||||
|
case (lexer::token_type::literal_false):
|
||||||
|
return "false literal";
|
||||||
|
case (lexer::token_type::literal_null):
|
||||||
|
return "null literal";
|
||||||
|
case (lexer::token_type::value_string):
|
||||||
|
return "string literal";
|
||||||
|
case (lexer::token_type::value_number):
|
||||||
|
return "number literal";
|
||||||
|
case (lexer::token_type::begin_array):
|
||||||
|
return "[";
|
||||||
|
case (lexer::token_type::begin_object):
|
||||||
|
return "{";
|
||||||
|
case (lexer::token_type::end_array):
|
||||||
|
return "]";
|
||||||
|
case (lexer::token_type::end_object):
|
||||||
|
return "}";
|
||||||
|
case (lexer::token_type::name_separator):
|
||||||
|
return ":";
|
||||||
|
case (lexer::token_type::value_separator):
|
||||||
|
return ",";
|
||||||
|
case (lexer::token_type::parse_error):
|
||||||
|
return "<parse error>";
|
||||||
|
case (lexer::token_type::end_of_input):
|
||||||
|
return "<end of input>";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void expect(typename lexer::token_type t) const
|
||||||
|
{
|
||||||
|
if (t != last_token)
|
||||||
|
{
|
||||||
|
std::string error_msg = "parse error - unexpected \'";
|
||||||
|
error_msg += m_lexer.get_string_value();
|
||||||
|
error_msg += "\' (" + token_type_name(last_token);
|
||||||
|
error_msg += "); expected " + token_type_name(t);
|
||||||
|
throw std::invalid_argument(error_msg);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
/// the buffer
|
/// the buffer
|
||||||
std::string buffer;
|
std::string m_buffer;
|
||||||
/// a pointer to the next character to read from the buffer
|
|
||||||
const lexer_char_t* m_cursor = nullptr;
|
|
||||||
/// a pointer past the last character of the buffer
|
|
||||||
const lexer_char_t* m_limit = nullptr;
|
|
||||||
/// a pointer to the beginning of the current token
|
|
||||||
const lexer_char_t* m_begin = nullptr;
|
|
||||||
/// the type of the last read token
|
/// the type of the last read token
|
||||||
token_type last_token = token_type::uninitialized;
|
typename lexer::token_type last_token = lexer::token_type::uninitialized;
|
||||||
|
lexer m_lexer;
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
110
test/unit.cpp
110
test/unit.cpp
|
@ -3892,27 +3892,43 @@ TEST_CASE("deserialization")
|
||||||
{
|
{
|
||||||
SECTION("string")
|
SECTION("string")
|
||||||
{
|
{
|
||||||
auto s = "[\"foo\",1,2,3,false,{\"one\":1}]";
|
// auto s = "[\"foo\",1,2,3,false,{\"one\":1}]";
|
||||||
|
// json j = json::parse(s);
|
||||||
|
// CHECK(j == json({"foo", 1, 2, 3, false, {{"one", 1}}}));
|
||||||
|
|
||||||
|
auto s = "null";
|
||||||
json j = json::parse(s);
|
json j = json::parse(s);
|
||||||
CHECK(j == json({"foo", 1, 2, 3, false, {{"one", 1}}}));
|
CHECK(j == json());
|
||||||
}
|
}
|
||||||
|
|
||||||
SECTION("operator<<")
|
SECTION("operator<<")
|
||||||
{
|
{
|
||||||
|
// std::stringstream ss;
|
||||||
|
// ss << "[\"foo\",1,2,3,false,{\"one\":1}]";
|
||||||
|
// json j;
|
||||||
|
// j << ss;
|
||||||
|
// CHECK(j == json({"foo", 1, 2, 3, false, {{"one", 1}}}));
|
||||||
|
|
||||||
std::stringstream ss;
|
std::stringstream ss;
|
||||||
ss << "[\"foo\",1,2,3,false,{\"one\":1}]";
|
ss << "null";
|
||||||
json j;
|
json j;
|
||||||
j << ss;
|
j << ss;
|
||||||
CHECK(j == json({"foo", 1, 2, 3, false, {{"one", 1}}}));
|
CHECK(j == json());
|
||||||
}
|
}
|
||||||
|
|
||||||
SECTION("operator>>")
|
SECTION("operator>>")
|
||||||
{
|
{
|
||||||
|
// std::stringstream ss;
|
||||||
|
// ss << "[\"foo\",1,2,3,false,{\"one\":1}]";
|
||||||
|
// json j;
|
||||||
|
// ss >> j;
|
||||||
|
// CHECK(j == json({"foo", 1, 2, 3, false, {{"one", 1}}}));
|
||||||
|
|
||||||
std::stringstream ss;
|
std::stringstream ss;
|
||||||
ss << "[\"foo\",1,2,3,false,{\"one\":1}]";
|
ss << "null";
|
||||||
json j;
|
json j;
|
||||||
ss >> j;
|
ss >> j;
|
||||||
CHECK(j == json({"foo", 1, 2, 3, false, {{"one", 1}}}));
|
CHECK(j == json());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3980,42 +3996,42 @@ TEST_CASE("parser class")
|
||||||
{
|
{
|
||||||
SECTION("structural characters")
|
SECTION("structural characters")
|
||||||
{
|
{
|
||||||
CHECK(json::parser("[").last_token == json::parser::token_type::begin_array);
|
CHECK(json::parser("[").last_token == json::lexer::token_type::begin_array);
|
||||||
CHECK(json::parser("]").last_token == json::parser::token_type::end_array);
|
CHECK(json::parser("]").last_token == json::lexer::token_type::end_array);
|
||||||
CHECK(json::parser("{").last_token == json::parser::token_type::begin_object);
|
CHECK(json::parser("{").last_token == json::lexer::token_type::begin_object);
|
||||||
CHECK(json::parser("}").last_token == json::parser::token_type::end_object);
|
CHECK(json::parser("}").last_token == json::lexer::token_type::end_object);
|
||||||
CHECK(json::parser(",").last_token == json::parser::token_type::value_separator);
|
CHECK(json::parser(",").last_token == json::lexer::token_type::value_separator);
|
||||||
CHECK(json::parser(":").last_token == json::parser::token_type::name_separator);
|
CHECK(json::parser(":").last_token == json::lexer::token_type::name_separator);
|
||||||
}
|
}
|
||||||
|
|
||||||
SECTION("literal names")
|
SECTION("literal names")
|
||||||
{
|
{
|
||||||
CHECK(json::parser("null").last_token == json::parser::token_type::literal_null);
|
CHECK(json::parser("null").last_token == json::lexer::token_type::literal_null);
|
||||||
CHECK(json::parser("true").last_token == json::parser::token_type::literal_true);
|
CHECK(json::parser("true").last_token == json::lexer::token_type::literal_true);
|
||||||
CHECK(json::parser("false").last_token == json::parser::token_type::literal_false);
|
CHECK(json::parser("false").last_token == json::lexer::token_type::literal_false);
|
||||||
}
|
}
|
||||||
|
|
||||||
SECTION("numbers")
|
SECTION("numbers")
|
||||||
{
|
{
|
||||||
CHECK(json::parser("0").last_token == json::parser::token_type::value_number);
|
CHECK(json::parser("0").last_token == json::lexer::token_type::value_number);
|
||||||
CHECK(json::parser("1").last_token == json::parser::token_type::value_number);
|
CHECK(json::parser("1").last_token == json::lexer::token_type::value_number);
|
||||||
CHECK(json::parser("2").last_token == json::parser::token_type::value_number);
|
CHECK(json::parser("2").last_token == json::lexer::token_type::value_number);
|
||||||
CHECK(json::parser("3").last_token == json::parser::token_type::value_number);
|
CHECK(json::parser("3").last_token == json::lexer::token_type::value_number);
|
||||||
CHECK(json::parser("4").last_token == json::parser::token_type::value_number);
|
CHECK(json::parser("4").last_token == json::lexer::token_type::value_number);
|
||||||
CHECK(json::parser("5").last_token == json::parser::token_type::value_number);
|
CHECK(json::parser("5").last_token == json::lexer::token_type::value_number);
|
||||||
CHECK(json::parser("6").last_token == json::parser::token_type::value_number);
|
CHECK(json::parser("6").last_token == json::lexer::token_type::value_number);
|
||||||
CHECK(json::parser("7").last_token == json::parser::token_type::value_number);
|
CHECK(json::parser("7").last_token == json::lexer::token_type::value_number);
|
||||||
CHECK(json::parser("8").last_token == json::parser::token_type::value_number);
|
CHECK(json::parser("8").last_token == json::lexer::token_type::value_number);
|
||||||
CHECK(json::parser("9").last_token == json::parser::token_type::value_number);
|
CHECK(json::parser("9").last_token == json::lexer::token_type::value_number);
|
||||||
}
|
}
|
||||||
|
|
||||||
SECTION("whitespace")
|
SECTION("whitespace")
|
||||||
{
|
{
|
||||||
CHECK(json::parser(" 0").last_token == json::parser::token_type::value_number);
|
CHECK(json::parser(" 0").last_token == json::lexer::token_type::value_number);
|
||||||
CHECK(json::parser("\t0").last_token == json::parser::token_type::value_number);
|
CHECK(json::parser("\t0").last_token == json::lexer::token_type::value_number);
|
||||||
CHECK(json::parser("\n0").last_token == json::parser::token_type::value_number);
|
CHECK(json::parser("\n0").last_token == json::lexer::token_type::value_number);
|
||||||
CHECK(json::parser("\r0").last_token == json::parser::token_type::value_number);
|
CHECK(json::parser("\r0").last_token == json::lexer::token_type::value_number);
|
||||||
CHECK(json::parser(" \t\n\r\n\t 0").last_token == json::parser::token_type::value_number);
|
CHECK(json::parser(" \t\n\r\n\t 0").last_token == json::lexer::token_type::value_number);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -4049,7 +4065,7 @@ TEST_CASE("parser class")
|
||||||
case ('9'):
|
case ('9'):
|
||||||
case ('"'):
|
case ('"'):
|
||||||
{
|
{
|
||||||
CHECK(json::parser(s).last_token != json::parser::token_type::parse_error);
|
CHECK(json::parser(s).last_token != json::lexer::token_type::parse_error);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4058,13 +4074,13 @@ TEST_CASE("parser class")
|
||||||
case ('\n'):
|
case ('\n'):
|
||||||
case ('\r'):
|
case ('\r'):
|
||||||
{
|
{
|
||||||
CHECK(json::parser(s).last_token == json::parser::token_type::end_of_input);
|
CHECK(json::parser(s).last_token == json::lexer::token_type::end_of_input);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
default:
|
default:
|
||||||
{
|
{
|
||||||
CHECK(json::parser(s).last_token == json::parser::token_type::parse_error);
|
CHECK(json::parser(s).last_token == json::lexer::token_type::parse_error);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -4093,19 +4109,19 @@ TEST_CASE("parser class")
|
||||||
|
|
||||||
SECTION("token_type_name")
|
SECTION("token_type_name")
|
||||||
{
|
{
|
||||||
CHECK(json::parser::token_type_name(json::parser::token_type::uninitialized) == "<uninitialized>");
|
CHECK(json::parser::token_type_name(json::lexer::token_type::uninitialized) == "<uninitialized>");
|
||||||
CHECK(json::parser::token_type_name(json::parser::token_type::literal_true) == "true literal");
|
CHECK(json::parser::token_type_name(json::lexer::token_type::literal_true) == "true literal");
|
||||||
CHECK(json::parser::token_type_name(json::parser::token_type::literal_false) == "false literal");
|
CHECK(json::parser::token_type_name(json::lexer::token_type::literal_false) == "false literal");
|
||||||
CHECK(json::parser::token_type_name(json::parser::token_type::literal_null) == "null literal");
|
CHECK(json::parser::token_type_name(json::lexer::token_type::literal_null) == "null literal");
|
||||||
CHECK(json::parser::token_type_name(json::parser::token_type::value_string) == "string literal");
|
CHECK(json::parser::token_type_name(json::lexer::token_type::value_string) == "string literal");
|
||||||
CHECK(json::parser::token_type_name(json::parser::token_type::value_number) == "number literal");
|
CHECK(json::parser::token_type_name(json::lexer::token_type::value_number) == "number literal");
|
||||||
CHECK(json::parser::token_type_name(json::parser::token_type::begin_array) == "[");
|
CHECK(json::parser::token_type_name(json::lexer::token_type::begin_array) == "[");
|
||||||
CHECK(json::parser::token_type_name(json::parser::token_type::begin_object) == "{");
|
CHECK(json::parser::token_type_name(json::lexer::token_type::begin_object) == "{");
|
||||||
CHECK(json::parser::token_type_name(json::parser::token_type::end_array) == "]");
|
CHECK(json::parser::token_type_name(json::lexer::token_type::end_array) == "]");
|
||||||
CHECK(json::parser::token_type_name(json::parser::token_type::end_object) == "}");
|
CHECK(json::parser::token_type_name(json::lexer::token_type::end_object) == "}");
|
||||||
CHECK(json::parser::token_type_name(json::parser::token_type::name_separator) == ":");
|
CHECK(json::parser::token_type_name(json::lexer::token_type::name_separator) == ":");
|
||||||
CHECK(json::parser::token_type_name(json::parser::token_type::value_separator) == ",");
|
CHECK(json::parser::token_type_name(json::lexer::token_type::value_separator) == ",");
|
||||||
CHECK(json::parser::token_type_name(json::parser::token_type::parse_error) == "<parse error>");
|
CHECK(json::parser::token_type_name(json::lexer::token_type::parse_error) == "<parse error>");
|
||||||
CHECK(json::parser::token_type_name(json::parser::token_type::end_of_input) == "<end of input>");
|
CHECK(json::parser::token_type_name(json::lexer::token_type::end_of_input) == "<end of input>");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue