🔨 using the SAX-DOM parser

This commit is contained in:
Niels Lohmann 2018-03-06 18:17:07 +01:00
parent faf2546a15
commit 5beab80553
No known key found for this signature in database
GPG key ID: 7F3CEA63AE251B69
6 changed files with 173 additions and 166 deletions

View file

@ -37,35 +37,6 @@ BENCHMARK_CAPTURE(ParseFile, signed_ints, "data/numbers/signed_ints.json");
BENCHMARK_CAPTURE(ParseFile, unsigned_ints, "data/numbers/unsigned_ints.json"); BENCHMARK_CAPTURE(ParseFile, unsigned_ints, "data/numbers/unsigned_ints.json");
static void ParseFileSax(benchmark::State& state, const char* filename)
{
while (state.KeepRunning())
{
state.PauseTiming();
auto* f = new std::ifstream(filename);
auto* sdp = new nlohmann::json_sax_dom_parser<json>();
state.ResumeTiming();
json::sax_parse(*f, sdp);
state.PauseTiming();
delete f;
delete sdp;
state.ResumeTiming();
}
std::ifstream file(filename, std::ios::binary | std::ios::ate);
state.SetBytesProcessed(state.iterations() * file.tellg());
}
BENCHMARK_CAPTURE(ParseFileSax, jeopardy, "data/jeopardy/jeopardy.json");
BENCHMARK_CAPTURE(ParseFileSax, canada, "data/nativejson-benchmark/canada.json");
BENCHMARK_CAPTURE(ParseFileSax, citm_catalog, "data/nativejson-benchmark/citm_catalog.json");
BENCHMARK_CAPTURE(ParseFileSax, twitter, "data/nativejson-benchmark/twitter.json");
BENCHMARK_CAPTURE(ParseFileSax, floats, "data/numbers/floats.json");
BENCHMARK_CAPTURE(ParseFileSax, signed_ints, "data/numbers/signed_ints.json");
BENCHMARK_CAPTURE(ParseFileSax, unsigned_ints, "data/numbers/unsigned_ints.json");
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
// parse JSON from string // parse JSON from string
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
@ -98,33 +69,6 @@ BENCHMARK_CAPTURE(ParseString, floats, "data/numbers/floats.json");
BENCHMARK_CAPTURE(ParseString, signed_ints, "data/numbers/signed_ints.json"); BENCHMARK_CAPTURE(ParseString, signed_ints, "data/numbers/signed_ints.json");
BENCHMARK_CAPTURE(ParseString, unsigned_ints, "data/numbers/unsigned_ints.json"); BENCHMARK_CAPTURE(ParseString, unsigned_ints, "data/numbers/unsigned_ints.json");
static void ParseStringSax(benchmark::State& state, const char* filename)
{
std::ifstream f(filename);
std::string str((std::istreambuf_iterator<char>(f)), std::istreambuf_iterator<char>());
while (state.KeepRunning())
{
state.PauseTiming();
auto* sdp = new nlohmann::json_sax_dom_parser<json>();
state.ResumeTiming();
json::sax_parse(str, sdp);
state.PauseTiming();
delete sdp;
state.ResumeTiming();
}
state.SetBytesProcessed(state.iterations() * str.size());
}
BENCHMARK_CAPTURE(ParseStringSax, jeopardy, "data/jeopardy/jeopardy.json");
BENCHMARK_CAPTURE(ParseStringSax, canada, "data/nativejson-benchmark/canada.json");
BENCHMARK_CAPTURE(ParseStringSax, citm_catalog, "data/nativejson-benchmark/citm_catalog.json");
BENCHMARK_CAPTURE(ParseStringSax, twitter, "data/nativejson-benchmark/twitter.json");
BENCHMARK_CAPTURE(ParseStringSax, floats, "data/numbers/floats.json");
BENCHMARK_CAPTURE(ParseStringSax, signed_ints, "data/numbers/signed_ints.json");
BENCHMARK_CAPTURE(ParseStringSax, unsigned_ints, "data/numbers/unsigned_ints.json");
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
// serialize JSON // serialize JSON

View file

@ -132,6 +132,10 @@ class json_sax_dom_parser : public json_sax<BasicJsonType>
using number_unsigned_t = typename BasicJsonType::number_unsigned_t; using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
using number_float_t = typename BasicJsonType::number_float_t; using number_float_t = typename BasicJsonType::number_float_t;
json_sax_dom_parser(BasicJsonType& r, const bool allow_exceptions_ = true)
: root(r), allow_exceptions(allow_exceptions_)
{}
bool null() override bool null() override
{ {
handle_value(nullptr); handle_value(nullptr);
@ -204,25 +208,30 @@ class json_sax_dom_parser : public json_sax<BasicJsonType>
return true; return true;
} }
bool parse_error(std::size_t position, const std::string&, const std::string& error_msg) override bool parse_error(std::size_t position, const std::string& token,
const std::string& error_msg) override
{ {
JSON_THROW(BasicJsonType::parse_error::create(101, position, error_msg)); errored = true;
if (allow_exceptions)
{
if (error_msg == "number overflow")
{
JSON_THROW(BasicJsonType::out_of_range::create(406, "number overflow parsing '" + token + "'"));
}
else
{
JSON_THROW(BasicJsonType::parse_error::create(101, position, error_msg));
}
}
return false; return false;
} }
BasicJsonType& get_value() bool is_errored() const
{ {
return root; return errored;
} }
private: private:
/// the parsed JSON value
BasicJsonType root;
/// stack to model hierarchy of values
std::vector<BasicJsonType*> ref_stack;
/// helper to hold the reference for the next object element
BasicJsonType* object_element = nullptr;
/*! /*!
@invariant If the ref stack is empty, then the passed value will be the new @invariant If the ref stack is empty, then the passed value will be the new
root. root.
@ -234,7 +243,6 @@ class json_sax_dom_parser : public json_sax<BasicJsonType>
{ {
if (ref_stack.empty()) if (ref_stack.empty())
{ {
assert(root.is_null());
root = BasicJsonType(std::forward<Value>(v)); root = BasicJsonType(std::forward<Value>(v));
return &root; return &root;
} }
@ -254,6 +262,17 @@ class json_sax_dom_parser : public json_sax<BasicJsonType>
} }
} }
} }
/// the parsed JSON value
BasicJsonType& root;
/// stack to model hierarchy of values
std::vector<BasicJsonType*> ref_stack;
/// helper to hold the reference for the next object element
BasicJsonType* object_element = nullptr;
/// whether a syntax error occurred
bool errored = false;
/// whether to throw exceptions in case of errors
const bool allow_exceptions = true;
}; };
} }

View file

@ -63,11 +63,10 @@ class parser
const parser_callback_t cb = nullptr, const parser_callback_t cb = nullptr,
const bool allow_exceptions_ = true) const bool allow_exceptions_ = true)
: callback(cb), m_lexer(adapter), allow_exceptions(allow_exceptions_) : callback(cb), m_lexer(adapter), allow_exceptions(allow_exceptions_)
{} {
// read first token
parser(detail::input_adapter_t adapter, json_sax_t* s) get_token();
: m_lexer(adapter), sax(s) }
{}
/*! /*!
@brief public parser interface @brief public parser interface
@ -81,31 +80,52 @@ class parser
*/ */
void parse(const bool strict, BasicJsonType& result) void parse(const bool strict, BasicJsonType& result)
{ {
// read first token if (callback)
get_token();
parse_internal(true, result);
result.assert_invariant();
// in strict mode, input must be completely read
if (strict)
{ {
get_token(); parse_internal(true, result);
expect(token_type::end_of_input); result.assert_invariant();
// in strict mode, input must be completely read
if (strict)
{
get_token();
expect(token_type::end_of_input);
}
// in case of an error, return discarded value
if (errored)
{
result = value_t::discarded;
return;
}
// set top-level value to null if it was discarded by the callback
// function
if (result.is_discarded())
{
result = nullptr;
}
} }
else
// in case of an error, return discarded value
if (errored)
{ {
result = value_t::discarded; json_sax_dom_parser<BasicJsonType> sdp(result, allow_exceptions);
return; sax_parse_internal(&sdp);
} result.assert_invariant();
// set top-level value to null if it was discarded by the callback // in strict mode, input must be completely read
// function if (strict and (get_token() != token_type::end_of_input))
if (result.is_discarded()) {
{ sdp.parse_error(m_lexer.get_position(),
result = nullptr; m_lexer.get_token_string(),
exception_message(token_type::end_of_input));
}
// in case of an error, return discarded value
if (sdp.is_errored())
{
result = value_t::discarded;
return;
}
} }
} }
@ -117,9 +137,6 @@ class parser
*/ */
bool accept(const bool strict = true) bool accept(const bool strict = true)
{ {
// read first token
get_token();
if (not accept_internal()) if (not accept_internal())
{ {
return false; return false;
@ -129,12 +146,9 @@ class parser
return not strict or (get_token() == token_type::end_of_input); return not strict or (get_token() == token_type::end_of_input);
} }
bool sax_parse() bool sax_parse(json_sax_t* sax)
{ {
// read first token return sax_parse_internal(sax);
get_token();
return sax_parse_internal();
} }
private: private:
@ -535,7 +549,7 @@ class parser
} }
} }
bool sax_parse_internal() bool sax_parse_internal(json_sax_t* sax)
{ {
switch (last_token) switch (last_token)
{ {
@ -584,7 +598,7 @@ class parser
// parse value // parse value
get_token(); get_token();
if (not sax_parse_internal()) if (not sax_parse_internal(sax))
{ {
return false; return false;
} }
@ -631,7 +645,7 @@ class parser
while (true) while (true)
{ {
// parse value // parse value
if (not sax_parse_internal()) if (not sax_parse_internal(sax))
{ {
return false; return false;
} }
@ -782,8 +796,6 @@ class parser
bool errored = false; bool errored = false;
/// whether to throw exceptions in case of errors /// whether to throw exceptions in case of errors
const bool allow_exceptions = true; const bool allow_exceptions = true;
/// associated SAX parse event receiver
json_sax_t* sax = nullptr;
}; };
} }
} }

View file

@ -5930,12 +5930,12 @@ class basic_json
static bool sax_parse(detail::input_adapter i, json_sax_t* sax) static bool sax_parse(detail::input_adapter i, json_sax_t* sax)
{ {
return parser(i, sax).sax_parse(); return parser(i).sax_parse(sax);
} }
static bool sax_parse(detail::input_adapter& i, json_sax_t* sax) static bool sax_parse(detail::input_adapter& i, json_sax_t* sax)
{ {
return parser(i, sax).sax_parse(); return parser(i).sax_parse(sax);
} }
/*! /*!
@ -6013,7 +6013,7 @@ class basic_json
typename std::iterator_traits<IteratorType>::iterator_category>::value, int>::type = 0> typename std::iterator_traits<IteratorType>::iterator_category>::value, int>::type = 0>
static bool sax_parse(IteratorType first, IteratorType last, json_sax_t* sax) static bool sax_parse(IteratorType first, IteratorType last, json_sax_t* sax)
{ {
return parser(detail::input_adapter(first, last), sax).sax_parse(); return parser(detail::input_adapter(first, last)).sax_parse(sax);
} }
/*! /*!

View file

@ -3266,6 +3266,10 @@ class json_sax_dom_parser : public json_sax<BasicJsonType>
using number_unsigned_t = typename BasicJsonType::number_unsigned_t; using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
using number_float_t = typename BasicJsonType::number_float_t; using number_float_t = typename BasicJsonType::number_float_t;
json_sax_dom_parser(BasicJsonType& r, const bool allow_exceptions_ = true)
: root(r), allow_exceptions(allow_exceptions_)
{}
bool null() override bool null() override
{ {
handle_value(nullptr); handle_value(nullptr);
@ -3338,25 +3342,30 @@ class json_sax_dom_parser : public json_sax<BasicJsonType>
return true; return true;
} }
bool parse_error(std::size_t position, const std::string&, const std::string& error_msg) override bool parse_error(std::size_t position, const std::string& token,
const std::string& error_msg) override
{ {
JSON_THROW(BasicJsonType::parse_error::create(101, position, error_msg)); errored = true;
if (allow_exceptions)
{
if (error_msg == "number overflow")
{
JSON_THROW(BasicJsonType::out_of_range::create(406, "number overflow parsing '" + token + "'"));
}
else
{
JSON_THROW(BasicJsonType::parse_error::create(101, position, error_msg));
}
}
return false; return false;
} }
BasicJsonType& get_value() bool is_errored() const
{ {
return root; return errored;
} }
private: private:
/// the parsed JSON value
BasicJsonType root;
/// stack to model hierarchy of values
std::vector<BasicJsonType*> ref_stack;
/// helper to hold the reference for the next object element
BasicJsonType* object_element = nullptr;
/*! /*!
@invariant If the ref stack is empty, then the passed value will be the new @invariant If the ref stack is empty, then the passed value will be the new
root. root.
@ -3368,7 +3377,6 @@ class json_sax_dom_parser : public json_sax<BasicJsonType>
{ {
if (ref_stack.empty()) if (ref_stack.empty())
{ {
assert(root.is_null());
root = BasicJsonType(std::forward<Value>(v)); root = BasicJsonType(std::forward<Value>(v));
return &root; return &root;
} }
@ -3388,6 +3396,17 @@ class json_sax_dom_parser : public json_sax<BasicJsonType>
} }
} }
} }
/// the parsed JSON value
BasicJsonType& root;
/// stack to model hierarchy of values
std::vector<BasicJsonType*> ref_stack;
/// helper to hold the reference for the next object element
BasicJsonType* object_element = nullptr;
/// whether a syntax error occurred
bool errored = false;
/// whether to throw exceptions in case of errors
const bool allow_exceptions = true;
}; };
} }
@ -3447,11 +3466,10 @@ class parser
const parser_callback_t cb = nullptr, const parser_callback_t cb = nullptr,
const bool allow_exceptions_ = true) const bool allow_exceptions_ = true)
: callback(cb), m_lexer(adapter), allow_exceptions(allow_exceptions_) : callback(cb), m_lexer(adapter), allow_exceptions(allow_exceptions_)
{} {
// read first token
parser(detail::input_adapter_t adapter, json_sax_t* s) get_token();
: m_lexer(adapter), sax(s) }
{}
/*! /*!
@brief public parser interface @brief public parser interface
@ -3465,31 +3483,52 @@ class parser
*/ */
void parse(const bool strict, BasicJsonType& result) void parse(const bool strict, BasicJsonType& result)
{ {
// read first token if (callback)
get_token();
parse_internal(true, result);
result.assert_invariant();
// in strict mode, input must be completely read
if (strict)
{ {
get_token(); parse_internal(true, result);
expect(token_type::end_of_input); result.assert_invariant();
// in strict mode, input must be completely read
if (strict)
{
get_token();
expect(token_type::end_of_input);
}
// in case of an error, return discarded value
if (errored)
{
result = value_t::discarded;
return;
}
// set top-level value to null if it was discarded by the callback
// function
if (result.is_discarded())
{
result = nullptr;
}
} }
else
// in case of an error, return discarded value
if (errored)
{ {
result = value_t::discarded; json_sax_dom_parser<BasicJsonType> sdp(result, allow_exceptions);
return; sax_parse_internal(&sdp);
} result.assert_invariant();
// set top-level value to null if it was discarded by the callback // in strict mode, input must be completely read
// function if (strict and (get_token() != token_type::end_of_input))
if (result.is_discarded()) {
{ sdp.parse_error(m_lexer.get_position(),
result = nullptr; m_lexer.get_token_string(),
exception_message(token_type::end_of_input));
}
// in case of an error, return discarded value
if (sdp.is_errored())
{
result = value_t::discarded;
return;
}
} }
} }
@ -3501,9 +3540,6 @@ class parser
*/ */
bool accept(const bool strict = true) bool accept(const bool strict = true)
{ {
// read first token
get_token();
if (not accept_internal()) if (not accept_internal())
{ {
return false; return false;
@ -3513,12 +3549,9 @@ class parser
return not strict or (get_token() == token_type::end_of_input); return not strict or (get_token() == token_type::end_of_input);
} }
bool sax_parse() bool sax_parse(json_sax_t* sax)
{ {
// read first token return sax_parse_internal(sax);
get_token();
return sax_parse_internal();
} }
private: private:
@ -3919,7 +3952,7 @@ class parser
} }
} }
bool sax_parse_internal() bool sax_parse_internal(json_sax_t* sax)
{ {
switch (last_token) switch (last_token)
{ {
@ -3968,7 +4001,7 @@ class parser
// parse value // parse value
get_token(); get_token();
if (not sax_parse_internal()) if (not sax_parse_internal(sax))
{ {
return false; return false;
} }
@ -4015,7 +4048,7 @@ class parser
while (true) while (true)
{ {
// parse value // parse value
if (not sax_parse_internal()) if (not sax_parse_internal(sax))
{ {
return false; return false;
} }
@ -4166,8 +4199,6 @@ class parser
bool errored = false; bool errored = false;
/// whether to throw exceptions in case of errors /// whether to throw exceptions in case of errors
const bool allow_exceptions = true; const bool allow_exceptions = true;
/// associated SAX parse event receiver
json_sax_t* sax = nullptr;
}; };
} }
} }
@ -15989,12 +16020,12 @@ class basic_json
static bool sax_parse(detail::input_adapter i, json_sax_t* sax) static bool sax_parse(detail::input_adapter i, json_sax_t* sax)
{ {
return parser(i, sax).sax_parse(); return parser(i).sax_parse(sax);
} }
static bool sax_parse(detail::input_adapter& i, json_sax_t* sax) static bool sax_parse(detail::input_adapter& i, json_sax_t* sax)
{ {
return parser(i, sax).sax_parse(); return parser(i).sax_parse(sax);
} }
/*! /*!
@ -16072,7 +16103,7 @@ class basic_json
typename std::iterator_traits<IteratorType>::iterator_category>::value, int>::type = 0> typename std::iterator_traits<IteratorType>::iterator_category>::value, int>::type = 0>
static bool sax_parse(IteratorType first, IteratorType last, json_sax_t* sax) static bool sax_parse(IteratorType first, IteratorType last, json_sax_t* sax)
{ {
return parser(detail::input_adapter(first, last), sax).sax_parse(); return parser(detail::input_adapter(first, last)).sax_parse(sax);
} }
/*! /*!

View file

@ -148,9 +148,10 @@ json parser_helper(const std::string& s)
CHECK_NOTHROW(json::parser(nlohmann::detail::input_adapter(s), nullptr, false).parse(true, j_nothrow)); CHECK_NOTHROW(json::parser(nlohmann::detail::input_adapter(s), nullptr, false).parse(true, j_nothrow));
CHECK(j_nothrow == j); CHECK(j_nothrow == j);
nlohmann::json_sax_dom_parser<json> sdp; json j_sax;
nlohmann::json_sax_dom_parser<json> sdp(j_sax);
json::sax_parse(s, &sdp); json::sax_parse(s, &sdp);
CHECK(sdp.get_value() == j); CHECK(j_sax == j);
return j; return j;
} }