🔨 added SAX-DOM-Parser

This commit is contained in:
Niels Lohmann 2018-03-05 21:06:00 +01:00
parent 9d27429527
commit 5b9d03cfdb
No known key found for this signature in database
GPG key ID: 7F3CEA63AE251B69
5 changed files with 325 additions and 124 deletions

View file

@ -37,6 +37,35 @@ BENCHMARK_CAPTURE(ParseFile, signed_ints, "data/numbers/signed_ints.json");
BENCHMARK_CAPTURE(ParseFile, unsigned_ints, "data/numbers/unsigned_ints.json");
static void ParseFileSax(benchmark::State& state, const char* filename)
{
while (state.KeepRunning())
{
state.PauseTiming();
auto* f = new std::ifstream(filename);
auto* sdp = new nlohmann::json_sax_dom_parser<json>();
state.ResumeTiming();
json::sax_parse(*f, sdp);
state.PauseTiming();
delete f;
delete sdp;
state.ResumeTiming();
}
std::ifstream file(filename, std::ios::binary | std::ios::ate);
state.SetBytesProcessed(state.iterations() * file.tellg());
}
BENCHMARK_CAPTURE(ParseFileSax, jeopardy, "data/jeopardy/jeopardy.json");
BENCHMARK_CAPTURE(ParseFileSax, canada, "data/nativejson-benchmark/canada.json");
BENCHMARK_CAPTURE(ParseFileSax, citm_catalog, "data/nativejson-benchmark/citm_catalog.json");
BENCHMARK_CAPTURE(ParseFileSax, twitter, "data/nativejson-benchmark/twitter.json");
BENCHMARK_CAPTURE(ParseFileSax, floats, "data/numbers/floats.json");
BENCHMARK_CAPTURE(ParseFileSax, signed_ints, "data/numbers/signed_ints.json");
BENCHMARK_CAPTURE(ParseFileSax, unsigned_ints, "data/numbers/unsigned_ints.json");
//////////////////////////////////////////////////////////////////////////////
// parse JSON from string
//////////////////////////////////////////////////////////////////////////////
@ -69,6 +98,33 @@ BENCHMARK_CAPTURE(ParseString, floats, "data/numbers/floats.json");
BENCHMARK_CAPTURE(ParseString, signed_ints, "data/numbers/signed_ints.json");
BENCHMARK_CAPTURE(ParseString, unsigned_ints, "data/numbers/unsigned_ints.json");
static void ParseStringSax(benchmark::State& state, const char* filename)
{
std::ifstream f(filename);
std::string str((std::istreambuf_iterator<char>(f)), std::istreambuf_iterator<char>());
while (state.KeepRunning())
{
state.PauseTiming();
auto* sdp = new nlohmann::json_sax_dom_parser<json>();
state.ResumeTiming();
json::sax_parse(str, sdp);
state.PauseTiming();
delete sdp;
state.ResumeTiming();
}
state.SetBytesProcessed(state.iterations() * str.size());
}
BENCHMARK_CAPTURE(ParseStringSax, jeopardy, "data/jeopardy/jeopardy.json");
BENCHMARK_CAPTURE(ParseStringSax, canada, "data/nativejson-benchmark/canada.json");
BENCHMARK_CAPTURE(ParseStringSax, citm_catalog, "data/nativejson-benchmark/citm_catalog.json");
BENCHMARK_CAPTURE(ParseStringSax, twitter, "data/nativejson-benchmark/twitter.json");
BENCHMARK_CAPTURE(ParseStringSax, floats, "data/numbers/floats.json");
BENCHMARK_CAPTURE(ParseStringSax, signed_ints, "data/numbers/signed_ints.json");
BENCHMARK_CAPTURE(ParseStringSax, unsigned_ints, "data/numbers/unsigned_ints.json");
//////////////////////////////////////////////////////////////////////////////
// serialize JSON

View file

@ -122,5 +122,137 @@ struct json_sax
virtual ~json_sax() = default;
};
template<typename BasicJsonType>
class json_sax_dom_parser : public json_sax<BasicJsonType>
{
public:
using number_integer_t = typename BasicJsonType::number_integer_t;
using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
using number_float_t = typename BasicJsonType::number_float_t;
bool null() override
{
handle_value(nullptr);
return true;
}
bool boolean(bool val) override
{
handle_value(val);
return true;
}
bool number_integer(number_integer_t val) override
{
handle_value(val);
return true;
}
bool number_unsigned(number_unsigned_t val) override
{
handle_value(val);
return true;
}
bool number_float(number_float_t val, const std::string&) override
{
handle_value(val);
return true;
}
bool string(std::string&& val) override
{
handle_value(val);
return true;
}
bool start_object(std::size_t) override
{
ref_stack.push_back(handle_value(BasicJsonType::value_t::object));
return true;
}
bool key(std::string&& val) override
{
last_key = val;
return true;
}
bool end_object() override
{
ref_stack.pop_back();
return true;
}
bool start_array(std::size_t) override
{
ref_stack.push_back(handle_value(BasicJsonType::value_t::array));
return true;
}
bool end_array() override
{
ref_stack.pop_back();
return true;
}
bool binary(const std::vector<uint8_t>&) override
{
return true;
}
bool parse_error(std::size_t position, const std::string&, const std::string& error_msg) override
{
JSON_THROW(BasicJsonType::parse_error::create(101, position, error_msg));
return false;
}
BasicJsonType& get_value()
{
return root;
}
private:
/// the parsed JSON value
BasicJsonType root;
/// stack to model hierarchy of values
std::vector<BasicJsonType*> ref_stack;
/// helper variable for object keys
std::string last_key;
/*!
@invariant If the ref stack is empty, then the passed value will be the new
root.
@invariant If the ref stack contains a value, then it is an array or an
object to which we can add elements
*/
template<typename Value>
BasicJsonType* handle_value(Value&& v)
{
if (ref_stack.empty())
{
assert(root.is_null());
root = BasicJsonType(std::forward<Value>(v));
return &root;
}
else
{
assert(ref_stack.back()->is_array() or ref_stack.back()->is_object());
if (ref_stack.back()->is_array())
{
ref_stack.back()->m_value.array->push_back(BasicJsonType(std::forward<Value>(v)));
return &(ref_stack.back()->m_value.array->back());
}
else
{
BasicJsonType& r = ref_stack.back()->m_value.object->operator[](last_key) = BasicJsonType(std::forward<Value>(v));
return &r;
}
}
}
};
}

View file

@ -171,6 +171,8 @@ class basic_json
friend class ::nlohmann::detail::binary_writer;
template<typename BasicJsonType>
friend class ::nlohmann::detail::binary_reader;
template<typename BasicJsonType>
friend class ::nlohmann::json_sax_dom_parser;
/// workaround type for MSVC
using basic_json_t = NLOHMANN_BASIC_JSON_TPL;

View file

@ -3256,6 +3256,138 @@ struct json_sax
virtual ~json_sax() = default;
};
template<typename BasicJsonType>
class json_sax_dom_parser : public json_sax<BasicJsonType>
{
public:
using number_integer_t = typename BasicJsonType::number_integer_t;
using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
using number_float_t = typename BasicJsonType::number_float_t;
bool null() override
{
handle_value(nullptr);
return true;
}
bool boolean(bool val) override
{
handle_value(val);
return true;
}
bool number_integer(number_integer_t val) override
{
handle_value(val);
return true;
}
bool number_unsigned(number_unsigned_t val) override
{
handle_value(val);
return true;
}
bool number_float(number_float_t val, const std::string&) override
{
handle_value(val);
return true;
}
bool string(std::string&& val) override
{
handle_value(val);
return true;
}
bool start_object(std::size_t) override
{
ref_stack.push_back(handle_value(BasicJsonType::value_t::object));
return true;
}
bool key(std::string&& val) override
{
last_key = val;
return true;
}
bool end_object() override
{
ref_stack.pop_back();
return true;
}
bool start_array(std::size_t) override
{
ref_stack.push_back(handle_value(BasicJsonType::value_t::array));
return true;
}
bool end_array() override
{
ref_stack.pop_back();
return true;
}
bool binary(const std::vector<uint8_t>&) override
{
return true;
}
bool parse_error(std::size_t position, const std::string&, const std::string& error_msg) override
{
JSON_THROW(BasicJsonType::parse_error::create(101, position, error_msg));
return false;
}
BasicJsonType& get_value()
{
return root;
}
private:
/// the parsed JSON value
BasicJsonType root;
/// stack to model hierarchy of values
std::vector<BasicJsonType*> ref_stack;
/// helper variable for object keys
std::string last_key;
/*!
@invariant If the ref stack is empty, then the passed value will be the new
root.
@invariant If the ref stack contains a value, then it is an array or an
object to which we can add elements
*/
template<typename Value>
BasicJsonType* handle_value(Value&& v)
{
if (ref_stack.empty())
{
assert(root.is_null());
root = BasicJsonType(std::forward<Value>(v));
return &root;
}
else
{
assert(ref_stack.back()->is_array() or ref_stack.back()->is_object());
if (ref_stack.back()->is_array())
{
ref_stack.back()->m_value.array->push_back(BasicJsonType(std::forward<Value>(v)));
return &(ref_stack.back()->m_value.array->back());
}
else
{
BasicJsonType& r = ref_stack.back()->m_value.object->operator[](last_key) = BasicJsonType(std::forward<Value>(v));
return &r;
}
}
}
};
}
@ -10096,6 +10228,8 @@ class basic_json
friend class ::nlohmann::detail::binary_writer;
template<typename BasicJsonType>
friend class ::nlohmann::detail::binary_reader;
template<typename BasicJsonType>
friend class ::nlohmann::json_sax_dom_parser;
/// workaround type for MSVC
using basic_json_t = NLOHMANN_BASIC_JSON_TPL;

View file

@ -134,129 +134,6 @@ class SaxEventLogger : public nlohmann::json::json_sax_t
bool errored = false;
};
class SaxDomParser : public nlohmann::json::json_sax_t
{
public:
bool null() override
{
handle_value(nullptr);
return true;
}
bool boolean(bool val) override
{
handle_value(val);
return true;
}
bool number_integer(json::number_integer_t val) override
{
handle_value(val);
return true;
}
bool number_unsigned(json::number_unsigned_t val) override
{
handle_value(val);
return true;
}
bool number_float(json::number_float_t val, const std::string&) override
{
handle_value(val);
return true;
}
bool string(std::string&& val) override
{
handle_value(val);
return true;
}
bool start_object(std::size_t) override
{
ref_stack.push_back(handle_value(json::value_t::object));
return true;
}
bool key(std::string&& val) override
{
last_key = val;
return true;
}
bool end_object() override
{
ref_stack.pop_back();
return true;
}
bool start_array(std::size_t) override
{
ref_stack.push_back(handle_value(json::value_t::array));
return true;
}
bool end_array() override
{
ref_stack.pop_back();
return true;
}
bool binary(const std::vector<uint8_t>&) override
{
return true;
}
bool parse_error(std::size_t position, const std::string&, const std::string&) override
{
return false;
}
json& get_value()
{
return root;
}
private:
/// the parsed JSON value
json root;
/// stack to model hierarchy of values
std::vector<json*> ref_stack;
/// helper variable for object keys
std::string last_key;
/*!
@invariant If the ref stack is empty, then the passed value will be the new
root.
@invariant If the ref stack contains a value, then it is an array or an
object to which we can add elements
*/
json* handle_value(json&& j)
{
if (ref_stack.empty())
{
assert(root.is_null());
root = j;
return &root;
}
else
{
assert(ref_stack.back()->is_array() or ref_stack.back()->is_object());
if (ref_stack.back()->is_array())
{
ref_stack.back()->push_back(j);
return &(ref_stack.back()->back());
}
else
{
json& r = ref_stack.back()->operator[](last_key) = j;
return &r;
}
}
}
};
json parser_helper(const std::string& s);
bool accept_helper(const std::string& s);
@ -271,7 +148,7 @@ json parser_helper(const std::string& s)
CHECK_NOTHROW(json::parser(nlohmann::detail::input_adapter(s), nullptr, false).parse(true, j_nothrow));
CHECK(j_nothrow == j);
SaxDomParser sdp;
nlohmann::json_sax_dom_parser<json> sdp;
json::sax_parse(s, &sdp);
CHECK(sdp.get_value() == j);