From 5b9d03cfdbe44e3352c9b293a86ae71e339977de Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Mon, 5 Mar 2018 21:06:00 +0100 Subject: [PATCH] :hammer: added SAX-DOM-Parser --- benchmarks/src/benchmarks.cpp | 56 +++++++++ include/nlohmann/detail/input/json_sax.hpp | 132 ++++++++++++++++++++ include/nlohmann/json.hpp | 2 + single_include/nlohmann/json.hpp | 134 +++++++++++++++++++++ test/src/unit-class_parser.cpp | 125 +------------------ 5 files changed, 325 insertions(+), 124 deletions(-) diff --git a/benchmarks/src/benchmarks.cpp b/benchmarks/src/benchmarks.cpp index bebef603..46135e44 100644 --- a/benchmarks/src/benchmarks.cpp +++ b/benchmarks/src/benchmarks.cpp @@ -37,6 +37,35 @@ BENCHMARK_CAPTURE(ParseFile, signed_ints, "data/numbers/signed_ints.json"); BENCHMARK_CAPTURE(ParseFile, unsigned_ints, "data/numbers/unsigned_ints.json"); +static void ParseFileSax(benchmark::State& state, const char* filename) +{ + while (state.KeepRunning()) + { + state.PauseTiming(); + auto* f = new std::ifstream(filename); + auto* sdp = new nlohmann::json_sax_dom_parser(); + state.ResumeTiming(); + + json::sax_parse(*f, sdp); + + state.PauseTiming(); + delete f; + delete sdp; + state.ResumeTiming(); + } + + std::ifstream file(filename, std::ios::binary | std::ios::ate); + state.SetBytesProcessed(state.iterations() * file.tellg()); +} +BENCHMARK_CAPTURE(ParseFileSax, jeopardy, "data/jeopardy/jeopardy.json"); +BENCHMARK_CAPTURE(ParseFileSax, canada, "data/nativejson-benchmark/canada.json"); +BENCHMARK_CAPTURE(ParseFileSax, citm_catalog, "data/nativejson-benchmark/citm_catalog.json"); +BENCHMARK_CAPTURE(ParseFileSax, twitter, "data/nativejson-benchmark/twitter.json"); +BENCHMARK_CAPTURE(ParseFileSax, floats, "data/numbers/floats.json"); +BENCHMARK_CAPTURE(ParseFileSax, signed_ints, "data/numbers/signed_ints.json"); +BENCHMARK_CAPTURE(ParseFileSax, unsigned_ints, "data/numbers/unsigned_ints.json"); + + ////////////////////////////////////////////////////////////////////////////// // parse JSON from string ////////////////////////////////////////////////////////////////////////////// @@ -69,6 +98,33 @@ BENCHMARK_CAPTURE(ParseString, floats, "data/numbers/floats.json"); BENCHMARK_CAPTURE(ParseString, signed_ints, "data/numbers/signed_ints.json"); BENCHMARK_CAPTURE(ParseString, unsigned_ints, "data/numbers/unsigned_ints.json"); +static void ParseStringSax(benchmark::State& state, const char* filename) +{ + std::ifstream f(filename); + std::string str((std::istreambuf_iterator(f)), std::istreambuf_iterator()); + + while (state.KeepRunning()) + { + state.PauseTiming(); + auto* sdp = new nlohmann::json_sax_dom_parser(); + state.ResumeTiming(); + + json::sax_parse(str, sdp); + + state.PauseTiming(); + delete sdp; + state.ResumeTiming(); + } + + state.SetBytesProcessed(state.iterations() * str.size()); +} +BENCHMARK_CAPTURE(ParseStringSax, jeopardy, "data/jeopardy/jeopardy.json"); +BENCHMARK_CAPTURE(ParseStringSax, canada, "data/nativejson-benchmark/canada.json"); +BENCHMARK_CAPTURE(ParseStringSax, citm_catalog, "data/nativejson-benchmark/citm_catalog.json"); +BENCHMARK_CAPTURE(ParseStringSax, twitter, "data/nativejson-benchmark/twitter.json"); +BENCHMARK_CAPTURE(ParseStringSax, floats, "data/numbers/floats.json"); +BENCHMARK_CAPTURE(ParseStringSax, signed_ints, "data/numbers/signed_ints.json"); +BENCHMARK_CAPTURE(ParseStringSax, unsigned_ints, "data/numbers/unsigned_ints.json"); ////////////////////////////////////////////////////////////////////////////// // serialize JSON diff --git a/include/nlohmann/detail/input/json_sax.hpp b/include/nlohmann/detail/input/json_sax.hpp index af2e7f24..38a0a710 100644 --- a/include/nlohmann/detail/input/json_sax.hpp +++ b/include/nlohmann/detail/input/json_sax.hpp @@ -122,5 +122,137 @@ struct json_sax virtual ~json_sax() = default; }; + + +template +class json_sax_dom_parser : public json_sax +{ + public: + using number_integer_t = typename BasicJsonType::number_integer_t; + using number_unsigned_t = typename BasicJsonType::number_unsigned_t; + using number_float_t = typename BasicJsonType::number_float_t; + + bool null() override + { + handle_value(nullptr); + return true; + } + + bool boolean(bool val) override + { + handle_value(val); + return true; + } + + bool number_integer(number_integer_t val) override + { + handle_value(val); + return true; + } + + bool number_unsigned(number_unsigned_t val) override + { + handle_value(val); + return true; + } + + bool number_float(number_float_t val, const std::string&) override + { + handle_value(val); + return true; + } + + bool string(std::string&& val) override + { + handle_value(val); + return true; + } + + bool start_object(std::size_t) override + { + ref_stack.push_back(handle_value(BasicJsonType::value_t::object)); + return true; + } + + bool key(std::string&& val) override + { + last_key = val; + return true; + } + + bool end_object() override + { + ref_stack.pop_back(); + return true; + } + + bool start_array(std::size_t) override + { + ref_stack.push_back(handle_value(BasicJsonType::value_t::array)); + return true; + } + + bool end_array() override + { + ref_stack.pop_back(); + return true; + } + + bool binary(const std::vector&) override + { + return true; + } + + bool parse_error(std::size_t position, const std::string&, const std::string& error_msg) override + { + JSON_THROW(BasicJsonType::parse_error::create(101, position, error_msg)); + return false; + } + + BasicJsonType& get_value() + { + return root; + } + + private: + /// the parsed JSON value + BasicJsonType root; + /// stack to model hierarchy of values + std::vector ref_stack; + /// helper variable for object keys + std::string last_key; + + /*! + @invariant If the ref stack is empty, then the passed value will be the new + root. + @invariant If the ref stack contains a value, then it is an array or an + object to which we can add elements + */ + template + BasicJsonType* handle_value(Value&& v) + { + if (ref_stack.empty()) + { + assert(root.is_null()); + root = BasicJsonType(std::forward(v)); + return &root; + } + else + { + assert(ref_stack.back()->is_array() or ref_stack.back()->is_object()); + if (ref_stack.back()->is_array()) + { + ref_stack.back()->m_value.array->push_back(BasicJsonType(std::forward(v))); + return &(ref_stack.back()->m_value.array->back()); + } + else + { + BasicJsonType& r = ref_stack.back()->m_value.object->operator[](last_key) = BasicJsonType(std::forward(v)); + return &r; + } + } + } +}; + } diff --git a/include/nlohmann/json.hpp b/include/nlohmann/json.hpp index 49cce53d..7eb0a352 100644 --- a/include/nlohmann/json.hpp +++ b/include/nlohmann/json.hpp @@ -171,6 +171,8 @@ class basic_json friend class ::nlohmann::detail::binary_writer; template friend class ::nlohmann::detail::binary_reader; + template + friend class ::nlohmann::json_sax_dom_parser; /// workaround type for MSVC using basic_json_t = NLOHMANN_BASIC_JSON_TPL; diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index 6138d33a..009f1109 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -3256,6 +3256,138 @@ struct json_sax virtual ~json_sax() = default; }; + + +template +class json_sax_dom_parser : public json_sax +{ + public: + using number_integer_t = typename BasicJsonType::number_integer_t; + using number_unsigned_t = typename BasicJsonType::number_unsigned_t; + using number_float_t = typename BasicJsonType::number_float_t; + + bool null() override + { + handle_value(nullptr); + return true; + } + + bool boolean(bool val) override + { + handle_value(val); + return true; + } + + bool number_integer(number_integer_t val) override + { + handle_value(val); + return true; + } + + bool number_unsigned(number_unsigned_t val) override + { + handle_value(val); + return true; + } + + bool number_float(number_float_t val, const std::string&) override + { + handle_value(val); + return true; + } + + bool string(std::string&& val) override + { + handle_value(val); + return true; + } + + bool start_object(std::size_t) override + { + ref_stack.push_back(handle_value(BasicJsonType::value_t::object)); + return true; + } + + bool key(std::string&& val) override + { + last_key = val; + return true; + } + + bool end_object() override + { + ref_stack.pop_back(); + return true; + } + + bool start_array(std::size_t) override + { + ref_stack.push_back(handle_value(BasicJsonType::value_t::array)); + return true; + } + + bool end_array() override + { + ref_stack.pop_back(); + return true; + } + + bool binary(const std::vector&) override + { + return true; + } + + bool parse_error(std::size_t position, const std::string&, const std::string& error_msg) override + { + JSON_THROW(BasicJsonType::parse_error::create(101, position, error_msg)); + return false; + } + + BasicJsonType& get_value() + { + return root; + } + + private: + /// the parsed JSON value + BasicJsonType root; + /// stack to model hierarchy of values + std::vector ref_stack; + /// helper variable for object keys + std::string last_key; + + /*! + @invariant If the ref stack is empty, then the passed value will be the new + root. + @invariant If the ref stack contains a value, then it is an array or an + object to which we can add elements + */ + template + BasicJsonType* handle_value(Value&& v) + { + if (ref_stack.empty()) + { + assert(root.is_null()); + root = BasicJsonType(std::forward(v)); + return &root; + } + else + { + assert(ref_stack.back()->is_array() or ref_stack.back()->is_object()); + if (ref_stack.back()->is_array()) + { + ref_stack.back()->m_value.array->push_back(BasicJsonType(std::forward(v))); + return &(ref_stack.back()->m_value.array->back()); + } + else + { + BasicJsonType& r = ref_stack.back()->m_value.object->operator[](last_key) = BasicJsonType(std::forward(v)); + return &r; + } + } + } +}; + } @@ -10096,6 +10228,8 @@ class basic_json friend class ::nlohmann::detail::binary_writer; template friend class ::nlohmann::detail::binary_reader; + template + friend class ::nlohmann::json_sax_dom_parser; /// workaround type for MSVC using basic_json_t = NLOHMANN_BASIC_JSON_TPL; diff --git a/test/src/unit-class_parser.cpp b/test/src/unit-class_parser.cpp index 9d40bfeb..a15ad167 100644 --- a/test/src/unit-class_parser.cpp +++ b/test/src/unit-class_parser.cpp @@ -134,129 +134,6 @@ class SaxEventLogger : public nlohmann::json::json_sax_t bool errored = false; }; -class SaxDomParser : public nlohmann::json::json_sax_t -{ - public: - bool null() override - { - handle_value(nullptr); - return true; - } - - bool boolean(bool val) override - { - handle_value(val); - return true; - } - - bool number_integer(json::number_integer_t val) override - { - handle_value(val); - return true; - } - - bool number_unsigned(json::number_unsigned_t val) override - { - handle_value(val); - return true; - } - - bool number_float(json::number_float_t val, const std::string&) override - { - handle_value(val); - return true; - } - - bool string(std::string&& val) override - { - handle_value(val); - return true; - } - - bool start_object(std::size_t) override - { - ref_stack.push_back(handle_value(json::value_t::object)); - return true; - } - - bool key(std::string&& val) override - { - last_key = val; - return true; - } - - bool end_object() override - { - ref_stack.pop_back(); - return true; - } - - bool start_array(std::size_t) override - { - ref_stack.push_back(handle_value(json::value_t::array)); - return true; - } - - bool end_array() override - { - ref_stack.pop_back(); - return true; - } - - bool binary(const std::vector&) override - { - return true; - } - - bool parse_error(std::size_t position, const std::string&, const std::string&) override - { - return false; - } - - json& get_value() - { - return root; - } - - private: - /// the parsed JSON value - json root; - /// stack to model hierarchy of values - std::vector ref_stack; - /// helper variable for object keys - std::string last_key; - - /*! - @invariant If the ref stack is empty, then the passed value will be the new - root. - @invariant If the ref stack contains a value, then it is an array or an - object to which we can add elements - */ - json* handle_value(json&& j) - { - if (ref_stack.empty()) - { - assert(root.is_null()); - root = j; - return &root; - } - else - { - assert(ref_stack.back()->is_array() or ref_stack.back()->is_object()); - if (ref_stack.back()->is_array()) - { - ref_stack.back()->push_back(j); - return &(ref_stack.back()->back()); - } - else - { - json& r = ref_stack.back()->operator[](last_key) = j; - return &r; - } - } - } -}; - json parser_helper(const std::string& s); bool accept_helper(const std::string& s); @@ -271,7 +148,7 @@ json parser_helper(const std::string& s) CHECK_NOTHROW(json::parser(nlohmann::detail::input_adapter(s), nullptr, false).parse(true, j_nothrow)); CHECK(j_nothrow == j); - SaxDomParser sdp; + nlohmann::json_sax_dom_parser sdp; json::sax_parse(s, &sdp); CHECK(sdp.get_value() == j);