From 9dbb4402fb82e0b7690543fe8718f515503672a3 Mon Sep 17 00:00:00 2001 From: Niels Date: Wed, 3 Jun 2015 23:34:10 +0200 Subject: [PATCH] improved performance for dump() --- Makefile | 2 +- appveyor.yml | 2 +- benchmarks/benchmarks.cpp | 10 +++- src/json.hpp | 122 ++++++++++++++++++++------------------ src/json.hpp.re2c | 122 ++++++++++++++++++++------------------ test/unit.cpp | 83 ++++++++++++++------------ 6 files changed, 185 insertions(+), 156 deletions(-) diff --git a/Makefile b/Makefile index 2fb0cf93..e2ad6094 100644 --- a/Makefile +++ b/Makefile @@ -33,6 +33,6 @@ pretty: src/json.hpp src/json.hpp.re2c test/unit.cpp benchmarks/benchmarks.cpp # benchmarks -json_benchmarks: benchmarks/benchmarks.cpp benchmarks/benchpress.hpp benchmarks/cxxopts.hpp +json_benchmarks: benchmarks/benchmarks.cpp benchmarks/benchpress.hpp benchmarks/cxxopts.hpp src/json.hpp $(CXX) -std=c++11 $(CXXFLAGS) -O3 -flto -I src -I benchmarks $< $(LDFLAGS) -o $@ ./json_benchmarks diff --git a/appveyor.yml b/appveyor.yml index 80aa46fe..45f155db 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -1,5 +1,5 @@ version: '{build}' -os: Visual Studio 2015 RC +os: MinGW init: [] install: [] build_script: diff --git a/benchmarks/benchmarks.cpp b/benchmarks/benchmarks.cpp index da47cb79..1f5eb5e3 100644 --- a/benchmarks/benchmarks.cpp +++ b/benchmarks/benchmarks.cpp @@ -49,12 +49,15 @@ BENCHMARK("dump jeopardy.json", [](benchpress::context* ctx) std::ifstream input_file("benchmarks/files/jeopardy/jeopardy.json"); nlohmann::json j; j << input_file; + std::ofstream output_file("jeopardy.dump.json"); ctx->reset_timer(); for (size_t i = 0; i < ctx->num_iterations(); ++i) { - j.dump(); + output_file << j; } + + std::remove("jeopardy.dump.json"); }) BENCHMARK("dump jeopardy.json with indent", [](benchpress::context* ctx) @@ -62,10 +65,13 @@ BENCHMARK("dump jeopardy.json with indent", [](benchpress::context* ctx) std::ifstream input_file("benchmarks/files/jeopardy/jeopardy.json"); nlohmann::json j; j << input_file; + std::ofstream output_file("jeopardy.dump.json"); ctx->reset_timer(); for (size_t i = 0; i < ctx->num_iterations(); ++i) { - j.dump(4); + output_file << std::setw(4) << j; } + + std::remove("jeopardy.dump.json"); }) diff --git a/src/json.hpp b/src/json.hpp index 0fa57b1c..030d955e 100644 --- a/src/json.hpp +++ b/src/json.hpp @@ -779,14 +779,18 @@ class basic_json */ inline string_t dump(const int indent = -1) const noexcept { + std::stringstream ss; + if (indent >= 0) { - return dump(true, static_cast(indent)); + dump(ss, true, static_cast(indent)); } else { - return dump(false, 0); + dump(ss, false, 0); } + + return ss.str(); } /// return the type of the object (explicit) @@ -1964,19 +1968,21 @@ class basic_json friend std::ostream& operator<<(std::ostream& o, const basic_json& j) { // read width member and use it as indentation parameter if nonzero - const int indentation = (o.width() == 0) ? -1 : o.width(); + const bool prettyPrint = (o.width() > 0); + const auto indentation = (prettyPrint ? o.width() : 0); - o << j.dump(indentation); + // reset width to 0 for subsequent calls to this stream + o.width(0); + + // do the actual serialization + j.dump(o, prettyPrint, indentation); return o; } /// serialize to stream friend std::ostream& operator>>(const basic_json& j, std::ostream& o) { - // read width member and use it as indentation parameter if nonzero - const int indentation = (o.width() == 0) ? -1 : o.width(); - - o << j.dump(indentation); + o << j; return o; } @@ -2067,15 +2073,11 @@ class basic_json characters by a sequence of "\u" followed by a four-digit hex representation. + @param o the stream to write the escaped string to @param s the string to escape - @return escaped string */ - static string_t escape_string(const string_t& s) noexcept + static void escape_string(std::ostream& o, const string_t& s) noexcept { - // create a result string of at least the size than s - string_t result; - result.reserve(s.size()); - for (const auto c : s) { switch (c) @@ -2083,49 +2085,49 @@ class basic_json // quotation mark (0x22) case '"': { - result += "\\\""; + o << "\\\""; break; } // reverse solidus (0x5c) case '\\': { - result += "\\\\"; + o << "\\\\"; break; } // backspace (0x08) case '\b': { - result += "\\b"; + o << "\\b"; break; } // formfeed (0x0c) case '\f': { - result += "\\f"; + o << "\\f"; break; } // newline (0x0a) case '\n': { - result += "\\n"; + o << "\\n"; break; } // carriage return (0x0d) case '\r': { - result += "\\r"; + o << "\\r"; break; } // horizontal tab (0x09) case '\t': { - result += "\\t"; + o << "\\t"; break; } @@ -2135,24 +2137,19 @@ class basic_json { // control characters (everything between 0x00 and 0x1f) // -> create four-digit hex representation - std::basic_stringstream ss; - ss << "\\u" << std::hex << std::setw(4) << std::setfill('0') << int(c); - result += ss.str(); + o << "\\u" << std::hex << std::setw(4) << std::setfill('0') << int(c); } else { // all other characters are added as-is - result.append(1, c); + o << c; } break; } } } - - return result; } - /*! @brief internal implementation of the serialization function @@ -2166,108 +2163,115 @@ class basic_json std::to_string() - floating-point numbers are converted to a string using "%g" format + @param o stream to write to @param prettyPrint whether the output shall be pretty-printed @param indentStep the indent level @param currentIndent the current indent level (only used internally) */ - inline string_t dump(const bool prettyPrint, const unsigned int indentStep, - const unsigned int currentIndent = 0) const noexcept + inline void dump(std::ostream& o, const bool prettyPrint, const unsigned int indentStep, + const unsigned int currentIndent = 0) const noexcept { // variable to hold indentation for recursive calls auto new_indent = currentIndent; - // helper function to return whitespace as indentation - const auto indent = [prettyPrint, &new_indent]() - { - return prettyPrint ? string_t(new_indent, ' ') : string_t(); - }; - switch (m_type) { case (value_t::object): { if (m_value.object->empty()) { - return "{}"; + o << "{}"; + return; } - string_t result = "{"; + o << "{"; // increase indentation if (prettyPrint) { new_indent += indentStep; - result += "\n"; + o << "\n"; } for (auto i = m_value.object->cbegin(); i != m_value.object->cend(); ++i) { if (i != m_value.object->cbegin()) { - result += prettyPrint ? ",\n" : ","; + o << (prettyPrint ? ",\n" : ","); } - result += indent() + "\"" + escape_string(i->first) + "\":" + (prettyPrint ? " " : "") - + i->second.dump(prettyPrint, indentStep, new_indent); + o << string_t(new_indent, ' ') << "\""; + escape_string(o, i->first); + o << "\":" << (prettyPrint ? " " : ""); + i->second.dump(o, prettyPrint, indentStep, new_indent); } // decrease indentation if (prettyPrint) { new_indent -= indentStep; - result += "\n"; + o << "\n"; } - return result + indent() + "}"; + o << string_t(new_indent, ' ') + "}"; + return; } case (value_t::array): { if (m_value.array->empty()) { - return "[]"; + o << "[]"; + return; } - string_t result = "["; + o << "["; // increase indentation if (prettyPrint) { new_indent += indentStep; - result += "\n"; + o << "\n"; } for (auto i = m_value.array->cbegin(); i != m_value.array->cend(); ++i) { if (i != m_value.array->cbegin()) { - result += prettyPrint ? ",\n" : ","; + o << (prettyPrint ? ",\n" : ","); } - result += indent() + i->dump(prettyPrint, indentStep, new_indent); + o << string_t(new_indent, ' '); + i->dump(o, prettyPrint, indentStep, new_indent); } // decrease indentation if (prettyPrint) { new_indent -= indentStep; - result += "\n"; + o << "\n"; } - return result + indent() + "]"; + o << string_t(new_indent, ' ') << "]"; + return; } case (value_t::string): { - return string_t("\"") + escape_string(*m_value.string) + "\""; + o << string_t("\""); + escape_string(o, *m_value.string); + o << "\""; + return; } case (value_t::boolean): { - return m_value.boolean ? "true" : "false"; + o << (m_value.boolean ? "true" : "false"); + return; } case (value_t::number_integer): { - return std::to_string(m_value.number_integer); + o << m_value.number_integer; + return; } case (value_t::number_float): @@ -2277,16 +2281,20 @@ class basic_json const auto sz = static_cast(std::snprintf(nullptr, 0, "%.15g", m_value.number_float)); std::vector buf(sz + 1); std::snprintf(&buf[0], buf.size(), "%.15g", m_value.number_float); - return string_t(buf.data()); + o << buf.data(); + return; } case (value_t::discarded): { - return ""; + o << ""; + return; } + default: { - return "null"; + o << "null"; + return; } } } diff --git a/src/json.hpp.re2c b/src/json.hpp.re2c index 003017de..84f5aaa0 100644 --- a/src/json.hpp.re2c +++ b/src/json.hpp.re2c @@ -779,14 +779,18 @@ class basic_json */ inline string_t dump(const int indent = -1) const noexcept { + std::stringstream ss; + if (indent >= 0) { - return dump(true, static_cast(indent)); + dump(ss, true, static_cast(indent)); } else { - return dump(false, 0); + dump(ss, false, 0); } + + return ss.str(); } /// return the type of the object (explicit) @@ -1964,19 +1968,21 @@ class basic_json friend std::ostream& operator<<(std::ostream& o, const basic_json& j) { // read width member and use it as indentation parameter if nonzero - const int indentation = (o.width() == 0) ? -1 : o.width(); + const bool prettyPrint = (o.width() > 0); + const auto indentation = (prettyPrint ? o.width() : 0); - o << j.dump(indentation); + // reset width to 0 for subsequent calls to this stream + o.width(0); + + // do the actual serialization + j.dump(o, prettyPrint, indentation); return o; } /// serialize to stream friend std::ostream& operator>>(const basic_json& j, std::ostream& o) { - // read width member and use it as indentation parameter if nonzero - const int indentation = (o.width() == 0) ? -1 : o.width(); - - o << j.dump(indentation); + o << j; return o; } @@ -2067,15 +2073,11 @@ class basic_json characters by a sequence of "\u" followed by a four-digit hex representation. + @param o the stream to write the escaped string to @param s the string to escape - @return escaped string */ - static string_t escape_string(const string_t& s) noexcept + static void escape_string(std::ostream& o, const string_t& s) noexcept { - // create a result string of at least the size than s - string_t result; - result.reserve(s.size()); - for (const auto c : s) { switch (c) @@ -2083,49 +2085,49 @@ class basic_json // quotation mark (0x22) case '"': { - result += "\\\""; + o << "\\\""; break; } // reverse solidus (0x5c) case '\\': { - result += "\\\\"; + o << "\\\\"; break; } // backspace (0x08) case '\b': { - result += "\\b"; + o << "\\b"; break; } // formfeed (0x0c) case '\f': { - result += "\\f"; + o << "\\f"; break; } // newline (0x0a) case '\n': { - result += "\\n"; + o << "\\n"; break; } // carriage return (0x0d) case '\r': { - result += "\\r"; + o << "\\r"; break; } // horizontal tab (0x09) case '\t': { - result += "\\t"; + o << "\\t"; break; } @@ -2135,24 +2137,19 @@ class basic_json { // control characters (everything between 0x00 and 0x1f) // -> create four-digit hex representation - std::basic_stringstream ss; - ss << "\\u" << std::hex << std::setw(4) << std::setfill('0') << int(c); - result += ss.str(); + o << "\\u" << std::hex << std::setw(4) << std::setfill('0') << int(c); } else { // all other characters are added as-is - result.append(1, c); + o << c; } break; } } } - - return result; } - /*! @brief internal implementation of the serialization function @@ -2166,108 +2163,115 @@ class basic_json std::to_string() - floating-point numbers are converted to a string using "%g" format + @param o stream to write to @param prettyPrint whether the output shall be pretty-printed @param indentStep the indent level @param currentIndent the current indent level (only used internally) */ - inline string_t dump(const bool prettyPrint, const unsigned int indentStep, - const unsigned int currentIndent = 0) const noexcept + inline void dump(std::ostream& o, const bool prettyPrint, const unsigned int indentStep, + const unsigned int currentIndent = 0) const noexcept { // variable to hold indentation for recursive calls auto new_indent = currentIndent; - // helper function to return whitespace as indentation - const auto indent = [prettyPrint, &new_indent]() - { - return prettyPrint ? string_t(new_indent, ' ') : string_t(); - }; - switch (m_type) { case (value_t::object): { if (m_value.object->empty()) { - return "{}"; + o << "{}"; + return; } - string_t result = "{"; + o << "{"; // increase indentation if (prettyPrint) { new_indent += indentStep; - result += "\n"; + o << "\n"; } for (auto i = m_value.object->cbegin(); i != m_value.object->cend(); ++i) { if (i != m_value.object->cbegin()) { - result += prettyPrint ? ",\n" : ","; + o << (prettyPrint ? ",\n" : ","); } - result += indent() + "\"" + escape_string(i->first) + "\":" + (prettyPrint ? " " : "") - + i->second.dump(prettyPrint, indentStep, new_indent); + o << string_t(new_indent, ' ') << "\""; + escape_string(o, i->first); + o << "\":" << (prettyPrint ? " " : ""); + i->second.dump(o, prettyPrint, indentStep, new_indent); } // decrease indentation if (prettyPrint) { new_indent -= indentStep; - result += "\n"; + o << "\n"; } - return result + indent() + "}"; + o << string_t(new_indent, ' ') + "}"; + return; } case (value_t::array): { if (m_value.array->empty()) { - return "[]"; + o << "[]"; + return; } - string_t result = "["; + o << "["; // increase indentation if (prettyPrint) { new_indent += indentStep; - result += "\n"; + o << "\n"; } for (auto i = m_value.array->cbegin(); i != m_value.array->cend(); ++i) { if (i != m_value.array->cbegin()) { - result += prettyPrint ? ",\n" : ","; + o << (prettyPrint ? ",\n" : ","); } - result += indent() + i->dump(prettyPrint, indentStep, new_indent); + o << string_t(new_indent, ' '); + i->dump(o, prettyPrint, indentStep, new_indent); } // decrease indentation if (prettyPrint) { new_indent -= indentStep; - result += "\n"; + o << "\n"; } - return result + indent() + "]"; + o << string_t(new_indent, ' ') << "]"; + return; } case (value_t::string): { - return string_t("\"") + escape_string(*m_value.string) + "\""; + o << string_t("\""); + escape_string(o, *m_value.string); + o << "\""; + return; } case (value_t::boolean): { - return m_value.boolean ? "true" : "false"; + o << (m_value.boolean ? "true" : "false"); + return; } case (value_t::number_integer): { - return std::to_string(m_value.number_integer); + o << m_value.number_integer; + return; } case (value_t::number_float): @@ -2277,16 +2281,20 @@ class basic_json const auto sz = static_cast(std::snprintf(nullptr, 0, "%.15g", m_value.number_float)); std::vector buf(sz + 1); std::snprintf(&buf[0], buf.size(), "%.15g", m_value.number_float); - return string_t(buf.data()); + o << buf.data(); + return; } case (value_t::discarded): { - return ""; + o << ""; + return; } + default: { - return "null"; + o << "null"; + return; } } } diff --git a/test/unit.cpp b/test/unit.cpp index 6e5e468e..5d3f1f3c 100644 --- a/test/unit.cpp +++ b/test/unit.cpp @@ -7162,45 +7162,52 @@ TEST_CASE("convenience functions") SECTION("string escape") { - CHECK(json::escape_string("\"") == "\\\""); - CHECK(json::escape_string("\\") == "\\\\"); - CHECK(json::escape_string("\b") == "\\b"); - CHECK(json::escape_string("\f") == "\\f"); - CHECK(json::escape_string("\n") == "\\n"); - CHECK(json::escape_string("\r") == "\\r"); - CHECK(json::escape_string("\t") == "\\t"); + auto escape_string = [](const std::string & s) + { + std::stringstream ss; + json::escape_string(ss, s); + return ss.str(); + }; - CHECK(json::escape_string("\x01") == "\\u0001"); - CHECK(json::escape_string("\x02") == "\\u0002"); - CHECK(json::escape_string("\x03") == "\\u0003"); - CHECK(json::escape_string("\x04") == "\\u0004"); - CHECK(json::escape_string("\x05") == "\\u0005"); - CHECK(json::escape_string("\x06") == "\\u0006"); - CHECK(json::escape_string("\x07") == "\\u0007"); - CHECK(json::escape_string("\x08") == "\\b"); - CHECK(json::escape_string("\x09") == "\\t"); - CHECK(json::escape_string("\x0a") == "\\n"); - CHECK(json::escape_string("\x0b") == "\\u000b"); - CHECK(json::escape_string("\x0c") == "\\f"); - CHECK(json::escape_string("\x0d") == "\\r"); - CHECK(json::escape_string("\x0e") == "\\u000e"); - CHECK(json::escape_string("\x0f") == "\\u000f"); - CHECK(json::escape_string("\x10") == "\\u0010"); - CHECK(json::escape_string("\x11") == "\\u0011"); - CHECK(json::escape_string("\x12") == "\\u0012"); - CHECK(json::escape_string("\x13") == "\\u0013"); - CHECK(json::escape_string("\x14") == "\\u0014"); - CHECK(json::escape_string("\x15") == "\\u0015"); - CHECK(json::escape_string("\x16") == "\\u0016"); - CHECK(json::escape_string("\x17") == "\\u0017"); - CHECK(json::escape_string("\x18") == "\\u0018"); - CHECK(json::escape_string("\x19") == "\\u0019"); - CHECK(json::escape_string("\x1a") == "\\u001a"); - CHECK(json::escape_string("\x1b") == "\\u001b"); - CHECK(json::escape_string("\x1c") == "\\u001c"); - CHECK(json::escape_string("\x1d") == "\\u001d"); - CHECK(json::escape_string("\x1e") == "\\u001e"); - CHECK(json::escape_string("\x1f") == "\\u001f"); + CHECK(escape_string("\"") == "\\\""); + CHECK(escape_string("\\") == "\\\\"); + CHECK(escape_string("\b") == "\\b"); + CHECK(escape_string("\f") == "\\f"); + CHECK(escape_string("\n") == "\\n"); + CHECK(escape_string("\r") == "\\r"); + CHECK(escape_string("\t") == "\\t"); + + CHECK(escape_string("\x01") == "\\u0001"); + CHECK(escape_string("\x02") == "\\u0002"); + CHECK(escape_string("\x03") == "\\u0003"); + CHECK(escape_string("\x04") == "\\u0004"); + CHECK(escape_string("\x05") == "\\u0005"); + CHECK(escape_string("\x06") == "\\u0006"); + CHECK(escape_string("\x07") == "\\u0007"); + CHECK(escape_string("\x08") == "\\b"); + CHECK(escape_string("\x09") == "\\t"); + CHECK(escape_string("\x0a") == "\\n"); + CHECK(escape_string("\x0b") == "\\u000b"); + CHECK(escape_string("\x0c") == "\\f"); + CHECK(escape_string("\x0d") == "\\r"); + CHECK(escape_string("\x0e") == "\\u000e"); + CHECK(escape_string("\x0f") == "\\u000f"); + CHECK(escape_string("\x10") == "\\u0010"); + CHECK(escape_string("\x11") == "\\u0011"); + CHECK(escape_string("\x12") == "\\u0012"); + CHECK(escape_string("\x13") == "\\u0013"); + CHECK(escape_string("\x14") == "\\u0014"); + CHECK(escape_string("\x15") == "\\u0015"); + CHECK(escape_string("\x16") == "\\u0016"); + CHECK(escape_string("\x17") == "\\u0017"); + CHECK(escape_string("\x18") == "\\u0018"); + CHECK(escape_string("\x19") == "\\u0019"); + CHECK(escape_string("\x1a") == "\\u001a"); + CHECK(escape_string("\x1b") == "\\u001b"); + CHECK(escape_string("\x1c") == "\\u001c"); + CHECK(escape_string("\x1d") == "\\u001d"); + CHECK(escape_string("\x1e") == "\\u001e"); + CHECK(escape_string("\x1f") == "\\u001f"); } }