From 899bd94b43a223baaaddca3caec8feb4dc3861b1 Mon Sep 17 00:00:00 2001 From: scinart Date: Fri, 18 Jan 2019 20:27:49 -0500 Subject: [PATCH] flush buffer in serializer::dump_escaped case UTF8_REJECT serializer use fixed buffer. Whenever it is nearly full, it is flushed to `output_adapter_t o` But the code forgets to flush when there is a invalid utf8 code point So there will be buffer overflow. --- include/nlohmann/detail/output/serializer.hpp | 10 +++ single_include/nlohmann/json.hpp | 10 +++ test/Makefile | 1 + test/src/unit-invalid_utf8.cpp | 68 +++++++++++++++++++ 4 files changed, 89 insertions(+) create mode 100644 test/src/unit-invalid_utf8.cpp diff --git a/include/nlohmann/detail/output/serializer.hpp b/include/nlohmann/detail/output/serializer.hpp index 4f7e1ede..b7a0e70c 100644 --- a/include/nlohmann/detail/output/serializer.hpp +++ b/include/nlohmann/detail/output/serializer.hpp @@ -454,6 +454,16 @@ class serializer string_buffer[bytes++] = detail::binary_writer::to_char_type('\xBF'); string_buffer[bytes++] = detail::binary_writer::to_char_type('\xBD'); } + + // write buffer and reset index; there must be 13 bytes + // left, as this is the maximal number of bytes to be + // written ("\uxxxx\uxxxx\0") for one code point + if (string_buffer.size() - bytes < 13) + { + o->write_characters(string_buffer.data(), bytes); + bytes = 0; + } + bytes_after_last_accept = bytes; } diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index ef4b0cd0..308969f3 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -11344,6 +11344,16 @@ class serializer string_buffer[bytes++] = detail::binary_writer::to_char_type('\xBF'); string_buffer[bytes++] = detail::binary_writer::to_char_type('\xBD'); } + + // write buffer and reset index; there must be 13 bytes + // left, as this is the maximal number of bytes to be + // written ("\uxxxx\uxxxx\0") for one code point + if (string_buffer.size() - bytes < 13) + { + o->write_characters(string_buffer.data(), bytes); + bytes = 0; + } + bytes_after_last_accept = bytes; } diff --git a/test/Makefile b/test/Makefile index 4f00cbc7..e4fc39dc 100644 --- a/test/Makefile +++ b/test/Makefile @@ -30,6 +30,7 @@ SOURCES = src/unit.cpp \ src/unit-items.cpp \ src/unit-iterators1.cpp \ src/unit-iterators2.cpp \ + src/unit-invalid-utf8.cpp \ src/unit-merge_patch.cpp \ src/unit-json_patch.cpp \ src/unit-json_pointer.cpp \ diff --git a/test/src/unit-invalid_utf8.cpp b/test/src/unit-invalid_utf8.cpp new file mode 100644 index 00000000..36f1ada0 --- /dev/null +++ b/test/src/unit-invalid_utf8.cpp @@ -0,0 +1,68 @@ +/* + __ _____ _____ _____ + __| | __| | | | JSON for Modern C++ (test suite) +| | |__ | | | | | | version 3.5.0 +|_____|_____|_____|_|___| https://github.com/nlohmann/json + +Licensed under the MIT License . +SPDX-License-Identifier: MIT +Copyright (c) 2013-2018 Niels Lohmann . + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +#include "catch.hpp" +#include +using nlohmann::json; + +#include +#include + +TEST_CASE("INVALID-UTF8") +{ + SECTION("a bunch of -1, ensure_ascii=true") + { + json dump_test; + std::vector data(300, -1); + std::vector vec_string(300, "\\ufffd"); + std::string s{data.data(), data.size()}; + dump_test["1"] = s; + std::ostringstream os; + os << "{\"1\":\""; + std::copy( vec_string.begin(), vec_string.end(), std::ostream_iterator(os)); + os << "\"}"; + s = dump_test.dump(-1, ' ', true, nlohmann::json::error_handler_t::replace); + CHECK(s == os.str()); + } + SECTION("a bunch of -2, ensure_ascii=false") + { + json dump_test; + std::vector data(500, -2); + std::vector vec_string(500, "\xEF\xBF\xBD"); + std::string s{data.data(), data.size()}; + dump_test["1"] = s; + std::ostringstream os; + os << "{\"1\":\""; + std::copy( vec_string.begin(), vec_string.end(), std::ostream_iterator(os)); + os << "\"}"; + s = dump_test.dump(-1, ' ', false, nlohmann::json::error_handler_t::replace); + CHECK(s == os.str()); + } + +}