From 5b9d03cfdbe44e3352c9b293a86ae71e339977de Mon Sep 17 00:00:00 2001
From: Niels Lohmann <mail@nlohmann.me>
Date: Mon, 5 Mar 2018 21:06:00 +0100
Subject: [PATCH] :hammer: added SAX-DOM-Parser

---
 benchmarks/src/benchmarks.cpp              |  56 +++++++++
 include/nlohmann/detail/input/json_sax.hpp | 132 ++++++++++++++++++++
 include/nlohmann/json.hpp                  |   2 +
 single_include/nlohmann/json.hpp           | 134 +++++++++++++++++++++
 test/src/unit-class_parser.cpp             | 125 +------------------
 5 files changed, 325 insertions(+), 124 deletions(-)

diff --git a/benchmarks/src/benchmarks.cpp b/benchmarks/src/benchmarks.cpp
index bebef603..46135e44 100644
--- a/benchmarks/src/benchmarks.cpp
+++ b/benchmarks/src/benchmarks.cpp
@@ -37,6 +37,35 @@ BENCHMARK_CAPTURE(ParseFile, signed_ints,   "data/numbers/signed_ints.json");
 BENCHMARK_CAPTURE(ParseFile, unsigned_ints, "data/numbers/unsigned_ints.json");
 
 
+static void ParseFileSax(benchmark::State& state, const char* filename)
+{
+    while (state.KeepRunning())
+    {
+        state.PauseTiming();
+        auto* f = new std::ifstream(filename);
+        auto* sdp = new nlohmann::json_sax_dom_parser<json>();
+        state.ResumeTiming();
+
+        json::sax_parse(*f, sdp);
+
+        state.PauseTiming();
+        delete f;
+        delete sdp;
+        state.ResumeTiming();
+    }
+
+    std::ifstream file(filename, std::ios::binary | std::ios::ate);
+    state.SetBytesProcessed(state.iterations() * file.tellg());
+}
+BENCHMARK_CAPTURE(ParseFileSax, jeopardy,      "data/jeopardy/jeopardy.json");
+BENCHMARK_CAPTURE(ParseFileSax, canada,        "data/nativejson-benchmark/canada.json");
+BENCHMARK_CAPTURE(ParseFileSax, citm_catalog,  "data/nativejson-benchmark/citm_catalog.json");
+BENCHMARK_CAPTURE(ParseFileSax, twitter,       "data/nativejson-benchmark/twitter.json");
+BENCHMARK_CAPTURE(ParseFileSax, floats,        "data/numbers/floats.json");
+BENCHMARK_CAPTURE(ParseFileSax, signed_ints,   "data/numbers/signed_ints.json");
+BENCHMARK_CAPTURE(ParseFileSax, unsigned_ints, "data/numbers/unsigned_ints.json");
+
+
 //////////////////////////////////////////////////////////////////////////////
 // parse JSON from string
 //////////////////////////////////////////////////////////////////////////////
@@ -69,6 +98,33 @@ BENCHMARK_CAPTURE(ParseString, floats,        "data/numbers/floats.json");
 BENCHMARK_CAPTURE(ParseString, signed_ints,   "data/numbers/signed_ints.json");
 BENCHMARK_CAPTURE(ParseString, unsigned_ints, "data/numbers/unsigned_ints.json");
 
+static void ParseStringSax(benchmark::State& state, const char* filename)
+{
+    std::ifstream f(filename);
+    std::string str((std::istreambuf_iterator<char>(f)), std::istreambuf_iterator<char>());
+
+    while (state.KeepRunning())
+    {
+        state.PauseTiming();
+        auto* sdp = new nlohmann::json_sax_dom_parser<json>();
+        state.ResumeTiming();
+
+        json::sax_parse(str, sdp);
+
+        state.PauseTiming();
+        delete sdp;
+        state.ResumeTiming();
+    }
+
+    state.SetBytesProcessed(state.iterations() * str.size());
+}
+BENCHMARK_CAPTURE(ParseStringSax, jeopardy,      "data/jeopardy/jeopardy.json");
+BENCHMARK_CAPTURE(ParseStringSax, canada,        "data/nativejson-benchmark/canada.json");
+BENCHMARK_CAPTURE(ParseStringSax, citm_catalog,  "data/nativejson-benchmark/citm_catalog.json");
+BENCHMARK_CAPTURE(ParseStringSax, twitter,       "data/nativejson-benchmark/twitter.json");
+BENCHMARK_CAPTURE(ParseStringSax, floats,        "data/numbers/floats.json");
+BENCHMARK_CAPTURE(ParseStringSax, signed_ints,   "data/numbers/signed_ints.json");
+BENCHMARK_CAPTURE(ParseStringSax, unsigned_ints, "data/numbers/unsigned_ints.json");
 
 //////////////////////////////////////////////////////////////////////////////
 // serialize JSON
diff --git a/include/nlohmann/detail/input/json_sax.hpp b/include/nlohmann/detail/input/json_sax.hpp
index af2e7f24..38a0a710 100644
--- a/include/nlohmann/detail/input/json_sax.hpp
+++ b/include/nlohmann/detail/input/json_sax.hpp
@@ -122,5 +122,137 @@ struct json_sax
 
     virtual ~json_sax() = default;
 };
+
+
+template<typename BasicJsonType>
+class json_sax_dom_parser : public json_sax<BasicJsonType>
+{
+  public:
+    using number_integer_t = typename BasicJsonType::number_integer_t;
+    using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
+    using number_float_t = typename BasicJsonType::number_float_t;
+
+    bool null() override
+    {
+        handle_value(nullptr);
+        return true;
+    }
+
+    bool boolean(bool val) override
+    {
+        handle_value(val);
+        return true;
+    }
+
+    bool number_integer(number_integer_t val) override
+    {
+        handle_value(val);
+        return true;
+    }
+
+    bool number_unsigned(number_unsigned_t val) override
+    {
+        handle_value(val);
+        return true;
+    }
+
+    bool number_float(number_float_t val, const std::string&) override
+    {
+        handle_value(val);
+        return true;
+    }
+
+    bool string(std::string&& val) override
+    {
+        handle_value(val);
+        return true;
+    }
+
+    bool start_object(std::size_t) override
+    {
+        ref_stack.push_back(handle_value(BasicJsonType::value_t::object));
+        return true;
+    }
+
+    bool key(std::string&& val) override
+    {
+        last_key = val;
+        return true;
+    }
+
+    bool end_object() override
+    {
+        ref_stack.pop_back();
+        return true;
+    }
+
+    bool start_array(std::size_t) override
+    {
+        ref_stack.push_back(handle_value(BasicJsonType::value_t::array));
+        return true;
+    }
+
+    bool end_array() override
+    {
+        ref_stack.pop_back();
+        return true;
+    }
+
+    bool binary(const std::vector<uint8_t>&) override
+    {
+        return true;
+    }
+
+    bool parse_error(std::size_t position, const std::string&, const std::string& error_msg) override
+    {
+        JSON_THROW(BasicJsonType::parse_error::create(101, position, error_msg));
+        return false;
+    }
+
+    BasicJsonType& get_value()
+    {
+        return root;
+    }
+
+  private:
+    /// the parsed JSON value
+    BasicJsonType root;
+    /// stack to model hierarchy of values
+    std::vector<BasicJsonType*> ref_stack;
+    /// helper variable for object keys
+    std::string last_key;
+
+    /*!
+    @invariant If the ref stack is empty, then the passed value will be the new
+               root.
+    @invariant If the ref stack contains a value, then it is an array or an
+               object to which we can add elements
+    */
+    template<typename Value>
+    BasicJsonType* handle_value(Value&& v)
+    {
+        if (ref_stack.empty())
+        {
+            assert(root.is_null());
+            root = BasicJsonType(std::forward<Value>(v));
+            return &root;
+        }
+        else
+        {
+            assert(ref_stack.back()->is_array() or ref_stack.back()->is_object());
+            if (ref_stack.back()->is_array())
+            {
+                ref_stack.back()->m_value.array->push_back(BasicJsonType(std::forward<Value>(v)));
+                return &(ref_stack.back()->m_value.array->back());
+            }
+            else
+            {
+                BasicJsonType& r = ref_stack.back()->m_value.object->operator[](last_key) = BasicJsonType(std::forward<Value>(v));
+                return &r;
+            }
+        }
+    }
+};
+
 }
 
diff --git a/include/nlohmann/json.hpp b/include/nlohmann/json.hpp
index 49cce53d..7eb0a352 100644
--- a/include/nlohmann/json.hpp
+++ b/include/nlohmann/json.hpp
@@ -171,6 +171,8 @@ class basic_json
     friend class ::nlohmann::detail::binary_writer;
     template<typename BasicJsonType>
     friend class ::nlohmann::detail::binary_reader;
+    template<typename BasicJsonType>
+    friend class ::nlohmann::json_sax_dom_parser;
 
     /// workaround type for MSVC
     using basic_json_t = NLOHMANN_BASIC_JSON_TPL;
diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp
index 6138d33a..009f1109 100644
--- a/single_include/nlohmann/json.hpp
+++ b/single_include/nlohmann/json.hpp
@@ -3256,6 +3256,138 @@ struct json_sax
 
     virtual ~json_sax() = default;
 };
+
+
+template<typename BasicJsonType>
+class json_sax_dom_parser : public json_sax<BasicJsonType>
+{
+  public:
+    using number_integer_t = typename BasicJsonType::number_integer_t;
+    using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
+    using number_float_t = typename BasicJsonType::number_float_t;
+
+    bool null() override
+    {
+        handle_value(nullptr);
+        return true;
+    }
+
+    bool boolean(bool val) override
+    {
+        handle_value(val);
+        return true;
+    }
+
+    bool number_integer(number_integer_t val) override
+    {
+        handle_value(val);
+        return true;
+    }
+
+    bool number_unsigned(number_unsigned_t val) override
+    {
+        handle_value(val);
+        return true;
+    }
+
+    bool number_float(number_float_t val, const std::string&) override
+    {
+        handle_value(val);
+        return true;
+    }
+
+    bool string(std::string&& val) override
+    {
+        handle_value(val);
+        return true;
+    }
+
+    bool start_object(std::size_t) override
+    {
+        ref_stack.push_back(handle_value(BasicJsonType::value_t::object));
+        return true;
+    }
+
+    bool key(std::string&& val) override
+    {
+        last_key = val;
+        return true;
+    }
+
+    bool end_object() override
+    {
+        ref_stack.pop_back();
+        return true;
+    }
+
+    bool start_array(std::size_t) override
+    {
+        ref_stack.push_back(handle_value(BasicJsonType::value_t::array));
+        return true;
+    }
+
+    bool end_array() override
+    {
+        ref_stack.pop_back();
+        return true;
+    }
+
+    bool binary(const std::vector<uint8_t>&) override
+    {
+        return true;
+    }
+
+    bool parse_error(std::size_t position, const std::string&, const std::string& error_msg) override
+    {
+        JSON_THROW(BasicJsonType::parse_error::create(101, position, error_msg));
+        return false;
+    }
+
+    BasicJsonType& get_value()
+    {
+        return root;
+    }
+
+  private:
+    /// the parsed JSON value
+    BasicJsonType root;
+    /// stack to model hierarchy of values
+    std::vector<BasicJsonType*> ref_stack;
+    /// helper variable for object keys
+    std::string last_key;
+
+    /*!
+    @invariant If the ref stack is empty, then the passed value will be the new
+               root.
+    @invariant If the ref stack contains a value, then it is an array or an
+               object to which we can add elements
+    */
+    template<typename Value>
+    BasicJsonType* handle_value(Value&& v)
+    {
+        if (ref_stack.empty())
+        {
+            assert(root.is_null());
+            root = BasicJsonType(std::forward<Value>(v));
+            return &root;
+        }
+        else
+        {
+            assert(ref_stack.back()->is_array() or ref_stack.back()->is_object());
+            if (ref_stack.back()->is_array())
+            {
+                ref_stack.back()->m_value.array->push_back(BasicJsonType(std::forward<Value>(v)));
+                return &(ref_stack.back()->m_value.array->back());
+            }
+            else
+            {
+                BasicJsonType& r = ref_stack.back()->m_value.object->operator[](last_key) = BasicJsonType(std::forward<Value>(v));
+                return &r;
+            }
+        }
+    }
+};
+
 }
 
 
@@ -10096,6 +10228,8 @@ class basic_json
     friend class ::nlohmann::detail::binary_writer;
     template<typename BasicJsonType>
     friend class ::nlohmann::detail::binary_reader;
+    template<typename BasicJsonType>
+    friend class ::nlohmann::json_sax_dom_parser;
 
     /// workaround type for MSVC
     using basic_json_t = NLOHMANN_BASIC_JSON_TPL;
diff --git a/test/src/unit-class_parser.cpp b/test/src/unit-class_parser.cpp
index 9d40bfeb..a15ad167 100644
--- a/test/src/unit-class_parser.cpp
+++ b/test/src/unit-class_parser.cpp
@@ -134,129 +134,6 @@ class SaxEventLogger : public nlohmann::json::json_sax_t
     bool errored = false;
 };
 
-class SaxDomParser : public nlohmann::json::json_sax_t
-{
-  public:
-    bool null() override
-    {
-        handle_value(nullptr);
-        return true;
-    }
-
-    bool boolean(bool val) override
-    {
-        handle_value(val);
-        return true;
-    }
-
-    bool number_integer(json::number_integer_t val) override
-    {
-        handle_value(val);
-        return true;
-    }
-
-    bool number_unsigned(json::number_unsigned_t val) override
-    {
-        handle_value(val);
-        return true;
-    }
-
-    bool number_float(json::number_float_t val, const std::string&) override
-    {
-        handle_value(val);
-        return true;
-    }
-
-    bool string(std::string&& val) override
-    {
-        handle_value(val);
-        return true;
-    }
-
-    bool start_object(std::size_t) override
-    {
-        ref_stack.push_back(handle_value(json::value_t::object));
-        return true;
-    }
-
-    bool key(std::string&& val) override
-    {
-        last_key = val;
-        return true;
-    }
-
-    bool end_object() override
-    {
-        ref_stack.pop_back();
-        return true;
-    }
-
-    bool start_array(std::size_t) override
-    {
-        ref_stack.push_back(handle_value(json::value_t::array));
-        return true;
-    }
-
-    bool end_array() override
-    {
-        ref_stack.pop_back();
-        return true;
-    }
-
-    bool binary(const std::vector<uint8_t>&) override
-    {
-        return true;
-    }
-
-    bool parse_error(std::size_t position, const std::string&, const std::string&) override
-    {
-        return false;
-    }
-
-    json& get_value()
-    {
-        return root;
-    }
-
-  private:
-    /// the parsed JSON value
-    json root;
-    /// stack to model hierarchy of values
-    std::vector<json*> ref_stack;
-    /// helper variable for object keys
-    std::string last_key;
-
-    /*!
-    @invariant If the ref stack is empty, then the passed value will be the new
-               root.
-    @invariant If the ref stack contains a value, then it is an array or an
-               object to which we can add elements
-    */
-    json* handle_value(json&& j)
-    {
-        if (ref_stack.empty())
-        {
-            assert(root.is_null());
-            root = j;
-            return &root;
-        }
-        else
-        {
-            assert(ref_stack.back()->is_array() or ref_stack.back()->is_object());
-            if (ref_stack.back()->is_array())
-            {
-                ref_stack.back()->push_back(j);
-                return &(ref_stack.back()->back());
-            }
-            else
-            {
-                json& r = ref_stack.back()->operator[](last_key) = j;
-                return &r;
-            }
-        }
-    }
-};
-
 json parser_helper(const std::string& s);
 bool accept_helper(const std::string& s);
 
@@ -271,7 +148,7 @@ json parser_helper(const std::string& s)
     CHECK_NOTHROW(json::parser(nlohmann::detail::input_adapter(s), nullptr, false).parse(true, j_nothrow));
     CHECK(j_nothrow == j);
 
-    SaxDomParser sdp;
+    nlohmann::json_sax_dom_parser<json> sdp;
     json::sax_parse(s, &sdp);
     CHECK(sdp.get_value() == j);