♻️ refactored binary readers to use a SAX parser

This commit is contained in:
Niels Lohmann 2018-03-11 18:47:38 +01:00
parent 149d2fd09c
commit ad47b0fbde
No known key found for this signature in database
GPG key ID: 7F3CEA63AE251B69
5 changed files with 592 additions and 493 deletions

View file

@ -16,6 +16,7 @@
#include <utility> // make_pair, move #include <utility> // make_pair, move
#include <nlohmann/detail/input/input_adapters.hpp> #include <nlohmann/detail/input/input_adapters.hpp>
#include <nlohmann/detail/input/json_sax.hpp>
#include <nlohmann/detail/exceptions.hpp> #include <nlohmann/detail/exceptions.hpp>
#include <nlohmann/detail/macro_scope.hpp> #include <nlohmann/detail/macro_scope.hpp>
#include <nlohmann/detail/value_t.hpp> #include <nlohmann/detail/value_t.hpp>
@ -37,6 +38,7 @@ class binary_reader
using number_integer_t = typename BasicJsonType::number_integer_t; using number_integer_t = typename BasicJsonType::number_integer_t;
using number_unsigned_t = typename BasicJsonType::number_unsigned_t; using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
using string_t = typename BasicJsonType::string_t; using string_t = typename BasicJsonType::string_t;
using json_sax_t = json_sax<BasicJsonType>;
public: public:
/*! /*!
@ -61,13 +63,16 @@ class binary_reader
*/ */
BasicJsonType parse_cbor(const bool strict) BasicJsonType parse_cbor(const bool strict)
{ {
const auto res = parse_cbor_internal(); BasicJsonType result;
json_sax_dom_parser<BasicJsonType> sdp(result);
parse_cbor_sax_internal(&sdp);
result.assert_invariant();
if (strict) if (strict)
{ {
get(); get();
expect_eof(); expect_eof();
} }
return res; return result;
} }
/*! /*!
@ -82,13 +87,16 @@ class binary_reader
*/ */
BasicJsonType parse_msgpack(const bool strict) BasicJsonType parse_msgpack(const bool strict)
{ {
const auto res = parse_msgpack_internal(); BasicJsonType result;
json_sax_dom_parser<BasicJsonType> sdp(result);
parse_msgpack_sax_internal(&sdp);
result.assert_invariant();
if (strict) if (strict)
{ {
get(); get();
expect_eof(); expect_eof();
} }
return res; return result;
} }
/*! /*!
@ -103,13 +111,16 @@ class binary_reader
*/ */
BasicJsonType parse_ubjson(const bool strict) BasicJsonType parse_ubjson(const bool strict)
{ {
const auto res = parse_ubjson_internal(); BasicJsonType result;
json_sax_dom_parser<BasicJsonType> sdp(result);
parse_ubjson_sax_internal(&sdp);
result.assert_invariant();
if (strict) if (strict)
{ {
get_ignore_noop(); get_ignore_noop();
expect_eof(); expect_eof();
} }
return res; return result;
} }
/*! /*!
@ -130,13 +141,14 @@ class binary_reader
input (true, default) or whether the last read input (true, default) or whether the last read
character should be considered instead character should be considered instead
*/ */
BasicJsonType parse_cbor_internal(const bool get_char = true) bool parse_cbor_sax_internal(json_sax_t* sax, const bool get_char = true)
{ {
switch (get_char ? get() : current) switch (get_char ? get() : current)
{ {
// EOF // EOF
case std::char_traits<char>::eof(): case std::char_traits<char>::eof():
JSON_THROW(parse_error::create(110, chars_read, "unexpected end of input")); unexpect_eof();
return sax->parse_error(chars_read, "<end of file>", "unexpected end of input");
// Integer 0x00..0x17 (0..23) // Integer 0x00..0x17 (0..23)
case 0x00: case 0x00:
@ -163,19 +175,19 @@ class binary_reader
case 0x15: case 0x15:
case 0x16: case 0x16:
case 0x17: case 0x17:
return static_cast<number_unsigned_t>(current); return sax->number_unsigned(static_cast<number_unsigned_t>(current));
case 0x18: // Unsigned integer (one-byte uint8_t follows) case 0x18: // Unsigned integer (one-byte uint8_t follows)
return get_number<uint8_t>(); return sax->number_unsigned(get_number<uint8_t>());
case 0x19: // Unsigned integer (two-byte uint16_t follows) case 0x19: // Unsigned integer (two-byte uint16_t follows)
return get_number<uint16_t>(); return sax->number_unsigned(get_number<uint16_t>());
case 0x1A: // Unsigned integer (four-byte uint32_t follows) case 0x1A: // Unsigned integer (four-byte uint32_t follows)
return get_number<uint32_t>(); return sax->number_unsigned(get_number<uint32_t>());
case 0x1B: // Unsigned integer (eight-byte uint64_t follows) case 0x1B: // Unsigned integer (eight-byte uint64_t follows)
return get_number<uint64_t>(); return sax->number_unsigned(get_number<uint64_t>());
// Negative integer -1-0x00..-1-0x17 (-1..-24) // Negative integer -1-0x00..-1-0x17 (-1..-24)
case 0x20: case 0x20:
@ -202,28 +214,20 @@ class binary_reader
case 0x35: case 0x35:
case 0x36: case 0x36:
case 0x37: case 0x37:
return static_cast<int8_t>(0x20 - 1 - current); return sax->number_integer(static_cast<int8_t>(0x20 - 1 - current));
case 0x38: // Negative integer (one-byte uint8_t follows) case 0x38: // Negative integer (one-byte uint8_t follows)
{ return sax->number_integer(static_cast<number_integer_t>(-1) - get_number<uint8_t>());
return static_cast<number_integer_t>(-1) - get_number<uint8_t>();
}
case 0x39: // Negative integer -1-n (two-byte uint16_t follows) case 0x39: // Negative integer -1-n (two-byte uint16_t follows)
{ return sax->number_integer(static_cast<number_integer_t>(-1) - get_number<uint16_t>());
return static_cast<number_integer_t>(-1) - get_number<uint16_t>();
}
case 0x3A: // Negative integer -1-n (four-byte uint32_t follows) case 0x3A: // Negative integer -1-n (four-byte uint32_t follows)
{ return sax->number_integer(static_cast<number_integer_t>(-1) - get_number<uint32_t>());
return static_cast<number_integer_t>(-1) - get_number<uint32_t>();
}
case 0x3B: // Negative integer -1-n (eight-byte uint64_t follows) case 0x3B: // Negative integer -1-n (eight-byte uint64_t follows)
{ return sax->number_integer(static_cast<number_integer_t>(-1) -
return static_cast<number_integer_t>(-1) - static_cast<number_integer_t>(get_number<uint64_t>()));
static_cast<number_integer_t>(get_number<uint64_t>());
}
// UTF-8 string (0x00..0x17 bytes follow) // UTF-8 string (0x00..0x17 bytes follow)
case 0x60: case 0x60:
@ -255,9 +259,7 @@ class binary_reader
case 0x7A: // UTF-8 string (four-byte uint32_t for n follow) case 0x7A: // UTF-8 string (four-byte uint32_t for n follow)
case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow) case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow)
case 0x7F: // UTF-8 string (indefinite length) case 0x7F: // UTF-8 string (indefinite length)
{ return sax->string(get_cbor_string());
return get_cbor_string();
}
// array (0x00..0x17 data items follow) // array (0x00..0x17 data items follow)
case 0x80: case 0x80:
@ -284,39 +286,22 @@ class binary_reader
case 0x95: case 0x95:
case 0x96: case 0x96:
case 0x97: case 0x97:
{ return get_cbor_array_sax(sax, static_cast<std::size_t>(current & 0x1F));
return get_cbor_array(current & 0x1F);
}
case 0x98: // array (one-byte uint8_t for n follows) case 0x98: // array (one-byte uint8_t for n follows)
{ return get_cbor_array_sax(sax, static_cast<std::size_t>(get_number<uint8_t>()));
return get_cbor_array(get_number<uint8_t>());
}
case 0x99: // array (two-byte uint16_t for n follow) case 0x99: // array (two-byte uint16_t for n follow)
{ return get_cbor_array_sax(sax, static_cast<std::size_t>(get_number<uint16_t>()));
return get_cbor_array(get_number<uint16_t>());
}
case 0x9A: // array (four-byte uint32_t for n follow) case 0x9A: // array (four-byte uint32_t for n follow)
{ return get_cbor_array_sax(sax, static_cast<std::size_t>(get_number<uint32_t>()));
return get_cbor_array(get_number<uint32_t>());
}
case 0x9B: // array (eight-byte uint64_t for n follow) case 0x9B: // array (eight-byte uint64_t for n follow)
{ return get_cbor_array_sax(sax, static_cast<std::size_t>(get_number<uint64_t>()));
return get_cbor_array(get_number<uint64_t>());
}
case 0x9F: // array (indefinite length) case 0x9F: // array (indefinite length)
{ return get_cbor_array_sax(sax, json_sax_t::no_limit);
BasicJsonType result = value_t::array;
while (get() != 0xFF)
{
result.push_back(parse_cbor_internal(false));
}
return result;
}
// map (0x00..0x17 pairs of data items follow) // map (0x00..0x17 pairs of data items follow)
case 0xA0: case 0xA0:
@ -343,55 +328,31 @@ class binary_reader
case 0xB5: case 0xB5:
case 0xB6: case 0xB6:
case 0xB7: case 0xB7:
{ return get_cbor_object_sax(sax, static_cast<std::size_t>(current & 0x1F));
return get_cbor_object(current & 0x1F);
}
case 0xB8: // map (one-byte uint8_t for n follows) case 0xB8: // map (one-byte uint8_t for n follows)
{ return get_cbor_object_sax(sax, static_cast<std::size_t>(get_number<uint8_t>()));
return get_cbor_object(get_number<uint8_t>());
}
case 0xB9: // map (two-byte uint16_t for n follow) case 0xB9: // map (two-byte uint16_t for n follow)
{ return get_cbor_object_sax(sax, static_cast<std::size_t>(get_number<uint16_t>()));
return get_cbor_object(get_number<uint16_t>());
}
case 0xBA: // map (four-byte uint32_t for n follow) case 0xBA: // map (four-byte uint32_t for n follow)
{ return get_cbor_object_sax(sax, static_cast<std::size_t>(get_number<uint32_t>()));
return get_cbor_object(get_number<uint32_t>());
}
case 0xBB: // map (eight-byte uint64_t for n follow) case 0xBB: // map (eight-byte uint64_t for n follow)
{ return get_cbor_object_sax(sax, static_cast<std::size_t>(get_number<uint64_t>()));
return get_cbor_object(get_number<uint64_t>());
}
case 0xBF: // map (indefinite length) case 0xBF: // map (indefinite length)
{ return get_cbor_object_sax(sax, json_sax_t::no_limit);
BasicJsonType result = value_t::object;
while (get() != 0xFF)
{
auto key = get_cbor_string();
result[key] = parse_cbor_internal();
}
return result;
}
case 0xF4: // false case 0xF4: // false
{ return sax->boolean(false);
return false;
}
case 0xF5: // true case 0xF5: // true
{ return sax->boolean(true);
return true;
}
case 0xF6: // null case 0xF6: // null
{ return sax->null();
return value_t::null;
}
case 0xF9: // Half-Precision Float (two-byte IEEE 754) case 0xF9: // Half-Precision Float (two-byte IEEE 754)
{ {
@ -425,18 +386,14 @@ class binary_reader
val = (mant == 0) ? std::numeric_limits<double>::infinity() val = (mant == 0) ? std::numeric_limits<double>::infinity()
: std::numeric_limits<double>::quiet_NaN(); : std::numeric_limits<double>::quiet_NaN();
} }
return (half & 0x8000) != 0 ? -val : val; return sax->number_float((half & 0x8000) != 0 ? -val : val, "");
} }
case 0xFA: // Single-Precision Float (four-byte IEEE 754) case 0xFA: // Single-Precision Float (four-byte IEEE 754)
{ return sax->number_float(static_cast<double>(get_number<float>()), "");
return get_number<float>();
}
case 0xFB: // Double-Precision Float (eight-byte IEEE 754) case 0xFB: // Double-Precision Float (eight-byte IEEE 754)
{ return sax->number_float(get_number<double>(), "");
return get_number<double>();
}
default: // anything else (0xFF is handled inside the other types) default: // anything else (0xFF is handled inside the other types)
{ {
@ -447,13 +404,14 @@ class binary_reader
} }
} }
BasicJsonType parse_msgpack_internal() bool parse_msgpack_sax_internal(json_sax_t* sax)
{ {
switch (get()) switch (get())
{ {
// EOF // EOF
case std::char_traits<char>::eof(): case std::char_traits<char>::eof():
JSON_THROW(parse_error::create(110, chars_read, "unexpected end of input")); unexpect_eof();
return sax->parse_error(chars_read, "<end of file>", "unexpected end of input");
// positive fixint // positive fixint
case 0x00: case 0x00:
@ -584,7 +542,7 @@ class binary_reader
case 0x7D: case 0x7D:
case 0x7E: case 0x7E:
case 0x7F: case 0x7F:
return static_cast<number_unsigned_t>(current); return sax->number_unsigned(static_cast<number_unsigned_t>(current));
// fixmap // fixmap
case 0x80: case 0x80:
@ -603,9 +561,7 @@ class binary_reader
case 0x8D: case 0x8D:
case 0x8E: case 0x8E:
case 0x8F: case 0x8F:
{ return get_msgpack_object_sax(sax, static_cast<std::size_t>(current & 0x0F));
return get_msgpack_object(current & 0x0F);
}
// fixarray // fixarray
case 0x90: case 0x90:
@ -624,9 +580,7 @@ class binary_reader
case 0x9D: case 0x9D:
case 0x9E: case 0x9E:
case 0x9F: case 0x9F:
{ return get_msgpack_array_sax(sax, static_cast<std::size_t>(current & 0x0F));
return get_msgpack_array(current & 0x0F);
}
// fixstr // fixstr
case 0xA0: case 0xA0:
@ -661,73 +615,65 @@ class binary_reader
case 0xBD: case 0xBD:
case 0xBE: case 0xBE:
case 0xBF: case 0xBF:
return get_msgpack_string(); return sax->string(get_msgpack_string());
case 0xC0: // nil case 0xC0: // nil
return value_t::null; return sax->null();
case 0xC2: // false case 0xC2: // false
return false; return sax->boolean(false);
case 0xC3: // true case 0xC3: // true
return true; return sax->boolean(true);
case 0xCA: // float 32 case 0xCA: // float 32
return get_number<float>(); return sax->number_float(static_cast<double>(get_number<float>()), "");
case 0xCB: // float 64 case 0xCB: // float 64
return get_number<double>(); return sax->number_float(get_number<double>(), "");
case 0xCC: // uint 8 case 0xCC: // uint 8
return get_number<uint8_t>(); return sax->number_unsigned(get_number<uint8_t>());
case 0xCD: // uint 16 case 0xCD: // uint 16
return get_number<uint16_t>(); return sax->number_unsigned(get_number<uint16_t>());
case 0xCE: // uint 32 case 0xCE: // uint 32
return get_number<uint32_t>(); return sax->number_unsigned(get_number<uint32_t>());
case 0xCF: // uint 64 case 0xCF: // uint 64
return get_number<uint64_t>(); return sax->number_unsigned(get_number<uint64_t>());
case 0xD0: // int 8 case 0xD0: // int 8
return get_number<int8_t>(); return sax->number_integer(get_number<int8_t>());
case 0xD1: // int 16 case 0xD1: // int 16
return get_number<int16_t>(); return sax->number_integer(get_number<int16_t>());
case 0xD2: // int 32 case 0xD2: // int 32
return get_number<int32_t>(); return sax->number_integer(get_number<int32_t>());
case 0xD3: // int 64 case 0xD3: // int 64
return get_number<int64_t>(); return sax->number_integer(get_number<int64_t>());
case 0xD9: // str 8 case 0xD9: // str 8
case 0xDA: // str 16 case 0xDA: // str 16
case 0xDB: // str 32 case 0xDB: // str 32
return get_msgpack_string(); return sax->string(get_msgpack_string());
case 0xDC: // array 16 case 0xDC: // array 16
{ return get_msgpack_array_sax(sax, static_cast<std::size_t>(get_number<uint16_t>()));
return get_msgpack_array(get_number<uint16_t>());
}
case 0xDD: // array 32 case 0xDD: // array 32
{ return get_msgpack_array_sax(sax, static_cast<std::size_t>(get_number<uint32_t>()));
return get_msgpack_array(get_number<uint32_t>());
}
case 0xDE: // map 16 case 0xDE: // map 16
{ return get_msgpack_object_sax(sax, static_cast<std::size_t>(get_number<uint16_t>()));
return get_msgpack_object(get_number<uint16_t>());
}
case 0xDF: // map 32 case 0xDF: // map 32
{ return get_msgpack_object_sax(sax, static_cast<std::size_t>(get_number<uint32_t>()));
return get_msgpack_object(get_number<uint32_t>());
}
// positive fixint // negative fixint
case 0xE0: case 0xE0:
case 0xE1: case 0xE1:
case 0xE2: case 0xE2:
@ -760,7 +706,7 @@ class binary_reader
case 0xFD: case 0xFD:
case 0xFE: case 0xFE:
case 0xFF: case 0xFF:
return static_cast<int8_t>(current); return sax->number_integer(static_cast<int8_t>(current));
default: // anything else default: // anything else
{ {
@ -777,9 +723,9 @@ class binary_reader
input (true, default) or whether the last read input (true, default) or whether the last read
character should be considered instead character should be considered instead
*/ */
BasicJsonType parse_ubjson_internal(const bool get_char = true) bool parse_ubjson_sax_internal(json_sax_t* sax, const bool get_char = true)
{ {
return get_ubjson_value(get_char ? get_ignore_noop() : current); return get_ubjson_sax_value(sax, get_char ? get_ignore_noop() : current);
} }
/*! /*!
@ -919,29 +865,19 @@ class binary_reader
case 0x75: case 0x75:
case 0x76: case 0x76:
case 0x77: case 0x77:
{
return get_string(current & 0x1F); return get_string(current & 0x1F);
}
case 0x78: // UTF-8 string (one-byte uint8_t for n follows) case 0x78: // UTF-8 string (one-byte uint8_t for n follows)
{
return get_string(get_number<uint8_t>()); return get_string(get_number<uint8_t>());
}
case 0x79: // UTF-8 string (two-byte uint16_t for n follow) case 0x79: // UTF-8 string (two-byte uint16_t for n follow)
{
return get_string(get_number<uint16_t>()); return get_string(get_number<uint16_t>());
}
case 0x7A: // UTF-8 string (four-byte uint32_t for n follow) case 0x7A: // UTF-8 string (four-byte uint32_t for n follow)
{
return get_string(get_number<uint32_t>()); return get_string(get_number<uint32_t>());
}
case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow) case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow)
{
return get_string(get_number<uint64_t>()); return get_string(get_number<uint64_t>());
}
case 0x7F: // UTF-8 string (indefinite length) case 0x7F: // UTF-8 string (indefinite length)
{ {
@ -962,31 +898,75 @@ class binary_reader
} }
} }
template<typename NumberType> bool get_cbor_array_sax(json_sax_t* sax, const std::size_t len)
BasicJsonType get_cbor_array(const NumberType len)
{ {
BasicJsonType result = value_t::array; if (not sax->start_array(len))
std::generate_n(std::back_inserter(*result.m_value.array), len, [this]()
{ {
return parse_cbor_internal(); return false;
}); }
return result;
if (len != json_sax_t::no_limit)
for (std::size_t i = 0; i < len; ++i)
{
if (not parse_cbor_sax_internal(sax))
{
return false;
}
}
else
{
while (get() != 0xFF)
{
if (not parse_cbor_sax_internal(sax, false))
{
return false;
}
}
}
return sax->end_array();
} }
template<typename NumberType> bool get_cbor_object_sax(json_sax_t* sax, const std::size_t len)
BasicJsonType get_cbor_object(const NumberType len)
{ {
BasicJsonType result = value_t::object; if (not sax->start_object(len))
std::generate_n(std::inserter(*result.m_value.object,
result.m_value.object->end()),
len, [this]()
{ {
get(); return false;
auto key = get_cbor_string(); }
auto val = parse_cbor_internal();
return std::make_pair(std::move(key), std::move(val)); if (len != json_sax_t::no_limit)
}); {
return result; for (std::size_t i = 0; i < len; ++i)
{
get();
if (not sax->key(get_cbor_string()))
{
return false;
}
if (not parse_cbor_sax_internal(sax))
{
return false;
}
}
}
else
{
while (get() != 0xFF)
{
if (not sax->key(get_cbor_string()))
{
return false;
}
if (not parse_cbor_sax_internal(sax))
{
return false;
}
}
}
return sax->end_object();
} }
/*! /*!
@ -1039,24 +1019,16 @@ class binary_reader
case 0xBD: case 0xBD:
case 0xBE: case 0xBE:
case 0xBF: case 0xBF:
{
return get_string(current & 0x1F); return get_string(current & 0x1F);
}
case 0xD9: // str 8 case 0xD9: // str 8
{
return get_string(get_number<uint8_t>()); return get_string(get_number<uint8_t>());
}
case 0xDA: // str 16 case 0xDA: // str 16
{
return get_string(get_number<uint16_t>()); return get_string(get_number<uint16_t>());
}
case 0xDB: // str 32 case 0xDB: // str 32
{
return get_string(get_number<uint32_t>()); return get_string(get_number<uint32_t>());
}
default: default:
{ {
@ -1068,31 +1040,46 @@ class binary_reader
} }
} }
template<typename NumberType> bool get_msgpack_array_sax(json_sax_t* sax, const std::size_t len)
BasicJsonType get_msgpack_array(const NumberType len)
{ {
BasicJsonType result = value_t::array; if (not sax->start_array(len))
std::generate_n(std::back_inserter(*result.m_value.array), len, [this]()
{ {
return parse_msgpack_internal(); return false;
}); }
return result;
for (std::size_t i = 0; i < len; ++i)
{
if (not parse_msgpack_sax_internal(sax))
{
return false;
}
}
return sax->end_array();
} }
template<typename NumberType> bool get_msgpack_object_sax(json_sax_t* sax, const std::size_t len)
BasicJsonType get_msgpack_object(const NumberType len)
{ {
BasicJsonType result = value_t::object; if (not sax->start_object(len))
std::generate_n(std::inserter(*result.m_value.object, {
result.m_value.object->end()), return false;
len, [this]() }
for (std::size_t i = 0; i < len; ++i)
{ {
get(); get();
auto key = get_msgpack_string(); if (not sax->key(get_msgpack_string()))
auto val = parse_msgpack_internal(); {
return std::make_pair(std::move(key), std::move(val)); return false;
}); }
return result;
if (not parse_msgpack_sax_internal(sax))
{
return false;
}
}
return sax->end_object();
} }
/*! /*!
@ -1140,6 +1127,25 @@ class binary_reader
} }
} }
std::size_t get_ubjson_size_value()
{
switch (get_ignore_noop())
{
case 'U':
return static_cast<std::size_t>(get_number<uint8_t>());
case 'i':
return static_cast<std::size_t>(get_number<int8_t>());
case 'I':
return static_cast<std::size_t>(get_number<int16_t>());
case 'l':
return static_cast<std::size_t>(get_number<int32_t>());
case 'L':
return static_cast<std::size_t>(get_number<int64_t>());
default:
return std::size_t(-1);
}
}
/*! /*!
@brief determine the type and size for a container @brief determine the type and size for a container
@ -1168,45 +1174,46 @@ class binary_reader
JSON_THROW(parse_error::create(112, chars_read, JSON_THROW(parse_error::create(112, chars_read,
"expected '#' after UBJSON type information; last byte: 0x" + ss.str())); "expected '#' after UBJSON type information; last byte: 0x" + ss.str()));
} }
sz = parse_ubjson_internal(); sz = get_ubjson_size_value();
} }
else if (current == '#') else if (current == '#')
{ {
sz = parse_ubjson_internal(); sz = get_ubjson_size_value();
} }
return std::make_pair(sz, tc); return std::make_pair(sz, tc);
} }
BasicJsonType get_ubjson_value(const int prefix) bool get_ubjson_sax_value(json_sax_t* sax, const int prefix)
{ {
switch (prefix) switch (prefix)
{ {
case std::char_traits<char>::eof(): // EOF case std::char_traits<char>::eof(): // EOF
JSON_THROW(parse_error::create(110, chars_read, "unexpected end of input")); unexpect_eof();
return sax->parse_error(chars_read, "<end of file>", "unexpected end of input");
case 'T': // true case 'T': // true
return true; return sax->boolean(true);
case 'F': // false case 'F': // false
return false; return sax->boolean(false);
case 'Z': // null case 'Z': // null
return nullptr; return sax->null();
case 'U': case 'U':
return get_number<uint8_t>(); return sax->number_unsigned(get_number<uint8_t>());
case 'i': case 'i':
return get_number<int8_t>(); return sax->number_integer(get_number<int8_t>());
case 'I': case 'I':
return get_number<int16_t>(); return sax->number_integer(get_number<int16_t>());
case 'l': case 'l':
return get_number<int32_t>(); return sax->number_integer(get_number<int32_t>());
case 'L': case 'L':
return get_number<int64_t>(); return sax->number_integer(get_number<int64_t>());
case 'd': case 'd':
return get_number<float>(); return sax->number_float(static_cast<double>(get_number<float>()), "");
case 'D': case 'D':
return get_number<double>(); return sax->number_float(get_number<double>(), "");
case 'C': // char case 'C': // char
{ {
@ -1219,17 +1226,17 @@ class binary_reader
JSON_THROW(parse_error::create(113, chars_read, JSON_THROW(parse_error::create(113, chars_read,
"byte after 'C' must be in range 0x00..0x7F; last byte: 0x" + ss.str())); "byte after 'C' must be in range 0x00..0x7F; last byte: 0x" + ss.str()));
} }
return string_t(1, static_cast<char>(current)); return sax->string(string_t(1, static_cast<char>(current)));
} }
case 'S': // string case 'S': // string
return get_ubjson_string(); return sax->string(get_ubjson_string());
case '[': // array case '[': // array
return get_ubjson_array(); return get_ubjson_sax_array(sax);
case '{': // object case '{': // object
return get_ubjson_object(); return get_ubjson_sax_object(sax);
default: // anything else default: // anything else
std::stringstream ss; std::stringstream ss;
@ -1239,98 +1246,123 @@ class binary_reader
} }
} }
BasicJsonType get_ubjson_array() bool get_ubjson_sax_array(json_sax_t* sax)
{ {
BasicJsonType result = value_t::array;
const auto size_and_type = get_ubjson_size_type(); const auto size_and_type = get_ubjson_size_type();
if (size_and_type.first != string_t::npos) if (size_and_type.first != string_t::npos)
{ {
if (JSON_UNLIKELY(size_and_type.first > result.max_size())) if (not sax->start_array(size_and_type.first))
{ {
JSON_THROW(out_of_range::create(408, return false;
"excessive array size: " + std::to_string(size_and_type.first)));
} }
if (size_and_type.second != 0) if (size_and_type.second != 0)
{ {
if (size_and_type.second != 'N') if (size_and_type.second != 'N')
{ {
std::generate_n(std::back_inserter(*result.m_value.array), for (std::size_t i = 0; i < size_and_type.first; ++i)
size_and_type.first, [this, size_and_type]()
{ {
return get_ubjson_value(size_and_type.second); if (not get_ubjson_sax_value(sax, size_and_type.second))
}); {
return false;
}
}
} }
} }
else else
{ {
std::generate_n(std::back_inserter(*result.m_value.array), for (std::size_t i = 0; i < size_and_type.first; ++i)
size_and_type.first, [this]()
{ {
return parse_ubjson_internal(); if (not parse_ubjson_sax_internal(sax))
}); {
return false;
}
}
} }
} }
else else
{ {
if (not sax->start_array())
{
return false;
}
while (current != ']') while (current != ']')
{ {
result.push_back(parse_ubjson_internal(false)); if (not parse_ubjson_sax_internal(sax, false))
{
return false;
}
get_ignore_noop(); get_ignore_noop();
} }
} }
return result; return sax->end_array();
} }
BasicJsonType get_ubjson_object() bool get_ubjson_sax_object(json_sax_t* sax)
{ {
BasicJsonType result = value_t::object;
const auto size_and_type = get_ubjson_size_type(); const auto size_and_type = get_ubjson_size_type();
if (size_and_type.first != string_t::npos) if (size_and_type.first != string_t::npos)
{ {
if (JSON_UNLIKELY(size_and_type.first > result.max_size())) if (not sax->start_object(size_and_type.first))
{ {
JSON_THROW(out_of_range::create(408, return false;
"excessive object size: " + std::to_string(size_and_type.first)));
} }
if (size_and_type.second != 0) if (size_and_type.second != 0)
{ {
std::generate_n(std::inserter(*result.m_value.object, for (std::size_t i = 0; i < size_and_type.first; ++i)
result.m_value.object->end()),
size_and_type.first, [this, size_and_type]()
{ {
auto key = get_ubjson_string(); if (not sax->key(get_ubjson_string()))
auto val = get_ubjson_value(size_and_type.second); {
return std::make_pair(std::move(key), std::move(val)); return false;
}); }
if (not get_ubjson_sax_value(sax, size_and_type.second))
{
return false;
}
}
} }
else else
{ {
std::generate_n(std::inserter(*result.m_value.object, for (std::size_t i = 0; i < size_and_type.first; ++i)
result.m_value.object->end()),
size_and_type.first, [this]()
{ {
auto key = get_ubjson_string(); if (not sax->key(get_ubjson_string()))
auto val = parse_ubjson_internal(); {
return std::make_pair(std::move(key), std::move(val)); return false;
}); }
if (not parse_ubjson_sax_internal(sax))
{
return false;
}
}
} }
} }
else else
{ {
if (not sax->start_object())
{
return false;
}
while (current != '}') while (current != '}')
{ {
auto key = get_ubjson_string(false); if (not sax->key(get_ubjson_string(false)))
result[std::move(key)] = parse_ubjson_internal(); {
return false;
}
if (not parse_ubjson_sax_internal(sax))
{
return false;
}
get_ignore_noop(); get_ignore_noop();
} }
} }
return result; return sax->end_object();
} }
/*! /*!

View file

@ -124,6 +124,8 @@ struct json_sax
}; };
namespace detail
{
template<typename BasicJsonType> template<typename BasicJsonType>
class json_sax_dom_parser : public json_sax<BasicJsonType> class json_sax_dom_parser : public json_sax<BasicJsonType>
{ {
@ -172,9 +174,16 @@ class json_sax_dom_parser : public json_sax<BasicJsonType>
return true; return true;
} }
bool start_object(std::size_t) override bool start_object(std::size_t len) override
{ {
ref_stack.push_back(handle_value(BasicJsonType::value_t::object)); ref_stack.push_back(handle_value(BasicJsonType::value_t::object));
if (JSON_UNLIKELY(len != json_sax<BasicJsonType>::no_limit and len > ref_stack.back()->max_size()))
{
JSON_THROW(out_of_range::create(408,
"excessive object size: " + std::to_string(len)));
}
return true; return true;
} }
@ -191,9 +200,16 @@ class json_sax_dom_parser : public json_sax<BasicJsonType>
return true; return true;
} }
bool start_array(std::size_t) override bool start_array(std::size_t len) override
{ {
ref_stack.push_back(handle_value(BasicJsonType::value_t::array)); ref_stack.push_back(handle_value(BasicJsonType::value_t::array));
if (JSON_UNLIKELY(len != json_sax<BasicJsonType>::no_limit and len > ref_stack.back()->max_size()))
{
JSON_THROW(out_of_range::create(408,
"excessive array size: " + std::to_string(len)));
}
return true; return true;
} }
@ -348,6 +364,7 @@ class json_sax_acceptor : public json_sax<BasicJsonType>
return false; return false;
} }
}; };
}
} }

View file

@ -172,7 +172,7 @@ class basic_json
template<typename BasicJsonType> template<typename BasicJsonType>
friend class ::nlohmann::detail::binary_reader; friend class ::nlohmann::detail::binary_reader;
template<typename BasicJsonType> template<typename BasicJsonType>
friend class ::nlohmann::json_sax_dom_parser; friend class ::nlohmann::detail::json_sax_dom_parser;
/// workaround type for MSVC /// workaround type for MSVC
using basic_json_t = NLOHMANN_BASIC_JSON_TPL; using basic_json_t = NLOHMANN_BASIC_JSON_TPL;

View file

@ -3258,6 +3258,8 @@ struct json_sax
}; };
namespace detail
{
template<typename BasicJsonType> template<typename BasicJsonType>
class json_sax_dom_parser : public json_sax<BasicJsonType> class json_sax_dom_parser : public json_sax<BasicJsonType>
{ {
@ -3306,9 +3308,16 @@ class json_sax_dom_parser : public json_sax<BasicJsonType>
return true; return true;
} }
bool start_object(std::size_t) override bool start_object(std::size_t len) override
{ {
ref_stack.push_back(handle_value(BasicJsonType::value_t::object)); ref_stack.push_back(handle_value(BasicJsonType::value_t::object));
if (JSON_UNLIKELY(len != json_sax<BasicJsonType>::no_limit and len > ref_stack.back()->max_size()))
{
JSON_THROW(out_of_range::create(408,
"excessive object size: " + std::to_string(len)));
}
return true; return true;
} }
@ -3325,9 +3334,16 @@ class json_sax_dom_parser : public json_sax<BasicJsonType>
return true; return true;
} }
bool start_array(std::size_t) override bool start_array(std::size_t len) override
{ {
ref_stack.push_back(handle_value(BasicJsonType::value_t::array)); ref_stack.push_back(handle_value(BasicJsonType::value_t::array));
if (JSON_UNLIKELY(len != json_sax<BasicJsonType>::no_limit and len > ref_stack.back()->max_size()))
{
JSON_THROW(out_of_range::create(408,
"excessive array size: " + std::to_string(len)));
}
return true; return true;
} }
@ -3482,6 +3498,7 @@ class json_sax_acceptor : public json_sax<BasicJsonType>
return false; return false;
} }
}; };
}
} }
@ -5285,6 +5302,8 @@ class output_adapter
// #include <nlohmann/detail/input/input_adapters.hpp> // #include <nlohmann/detail/input/input_adapters.hpp>
// #include <nlohmann/detail/input/json_sax.hpp>
// #include <nlohmann/detail/exceptions.hpp> // #include <nlohmann/detail/exceptions.hpp>
// #include <nlohmann/detail/macro_scope.hpp> // #include <nlohmann/detail/macro_scope.hpp>
@ -5309,6 +5328,7 @@ class binary_reader
using number_integer_t = typename BasicJsonType::number_integer_t; using number_integer_t = typename BasicJsonType::number_integer_t;
using number_unsigned_t = typename BasicJsonType::number_unsigned_t; using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
using string_t = typename BasicJsonType::string_t; using string_t = typename BasicJsonType::string_t;
using json_sax_t = json_sax<BasicJsonType>;
public: public:
/*! /*!
@ -5333,13 +5353,16 @@ class binary_reader
*/ */
BasicJsonType parse_cbor(const bool strict) BasicJsonType parse_cbor(const bool strict)
{ {
const auto res = parse_cbor_internal(); BasicJsonType result;
json_sax_dom_parser<BasicJsonType> sdp(result);
parse_cbor_sax_internal(&sdp);
result.assert_invariant();
if (strict) if (strict)
{ {
get(); get();
expect_eof(); expect_eof();
} }
return res; return result;
} }
/*! /*!
@ -5354,13 +5377,16 @@ class binary_reader
*/ */
BasicJsonType parse_msgpack(const bool strict) BasicJsonType parse_msgpack(const bool strict)
{ {
const auto res = parse_msgpack_internal(); BasicJsonType result;
json_sax_dom_parser<BasicJsonType> sdp(result);
parse_msgpack_sax_internal(&sdp);
result.assert_invariant();
if (strict) if (strict)
{ {
get(); get();
expect_eof(); expect_eof();
} }
return res; return result;
} }
/*! /*!
@ -5375,13 +5401,16 @@ class binary_reader
*/ */
BasicJsonType parse_ubjson(const bool strict) BasicJsonType parse_ubjson(const bool strict)
{ {
const auto res = parse_ubjson_internal(); BasicJsonType result;
json_sax_dom_parser<BasicJsonType> sdp(result);
parse_ubjson_sax_internal(&sdp);
result.assert_invariant();
if (strict) if (strict)
{ {
get_ignore_noop(); get_ignore_noop();
expect_eof(); expect_eof();
} }
return res; return result;
} }
/*! /*!
@ -5402,13 +5431,14 @@ class binary_reader
input (true, default) or whether the last read input (true, default) or whether the last read
character should be considered instead character should be considered instead
*/ */
BasicJsonType parse_cbor_internal(const bool get_char = true) bool parse_cbor_sax_internal(json_sax_t* sax, const bool get_char = true)
{ {
switch (get_char ? get() : current) switch (get_char ? get() : current)
{ {
// EOF // EOF
case std::char_traits<char>::eof(): case std::char_traits<char>::eof():
JSON_THROW(parse_error::create(110, chars_read, "unexpected end of input")); unexpect_eof();
return sax->parse_error(chars_read, "<end of file>", "unexpected end of input");
// Integer 0x00..0x17 (0..23) // Integer 0x00..0x17 (0..23)
case 0x00: case 0x00:
@ -5435,19 +5465,19 @@ class binary_reader
case 0x15: case 0x15:
case 0x16: case 0x16:
case 0x17: case 0x17:
return static_cast<number_unsigned_t>(current); return sax->number_unsigned(static_cast<number_unsigned_t>(current));
case 0x18: // Unsigned integer (one-byte uint8_t follows) case 0x18: // Unsigned integer (one-byte uint8_t follows)
return get_number<uint8_t>(); return sax->number_unsigned(get_number<uint8_t>());
case 0x19: // Unsigned integer (two-byte uint16_t follows) case 0x19: // Unsigned integer (two-byte uint16_t follows)
return get_number<uint16_t>(); return sax->number_unsigned(get_number<uint16_t>());
case 0x1A: // Unsigned integer (four-byte uint32_t follows) case 0x1A: // Unsigned integer (four-byte uint32_t follows)
return get_number<uint32_t>(); return sax->number_unsigned(get_number<uint32_t>());
case 0x1B: // Unsigned integer (eight-byte uint64_t follows) case 0x1B: // Unsigned integer (eight-byte uint64_t follows)
return get_number<uint64_t>(); return sax->number_unsigned(get_number<uint64_t>());
// Negative integer -1-0x00..-1-0x17 (-1..-24) // Negative integer -1-0x00..-1-0x17 (-1..-24)
case 0x20: case 0x20:
@ -5474,28 +5504,20 @@ class binary_reader
case 0x35: case 0x35:
case 0x36: case 0x36:
case 0x37: case 0x37:
return static_cast<int8_t>(0x20 - 1 - current); return sax->number_integer(static_cast<int8_t>(0x20 - 1 - current));
case 0x38: // Negative integer (one-byte uint8_t follows) case 0x38: // Negative integer (one-byte uint8_t follows)
{ return sax->number_integer(static_cast<number_integer_t>(-1) - get_number<uint8_t>());
return static_cast<number_integer_t>(-1) - get_number<uint8_t>();
}
case 0x39: // Negative integer -1-n (two-byte uint16_t follows) case 0x39: // Negative integer -1-n (two-byte uint16_t follows)
{ return sax->number_integer(static_cast<number_integer_t>(-1) - get_number<uint16_t>());
return static_cast<number_integer_t>(-1) - get_number<uint16_t>();
}
case 0x3A: // Negative integer -1-n (four-byte uint32_t follows) case 0x3A: // Negative integer -1-n (four-byte uint32_t follows)
{ return sax->number_integer(static_cast<number_integer_t>(-1) - get_number<uint32_t>());
return static_cast<number_integer_t>(-1) - get_number<uint32_t>();
}
case 0x3B: // Negative integer -1-n (eight-byte uint64_t follows) case 0x3B: // Negative integer -1-n (eight-byte uint64_t follows)
{ return sax->number_integer(static_cast<number_integer_t>(-1) -
return static_cast<number_integer_t>(-1) - static_cast<number_integer_t>(get_number<uint64_t>()));
static_cast<number_integer_t>(get_number<uint64_t>());
}
// UTF-8 string (0x00..0x17 bytes follow) // UTF-8 string (0x00..0x17 bytes follow)
case 0x60: case 0x60:
@ -5527,9 +5549,7 @@ class binary_reader
case 0x7A: // UTF-8 string (four-byte uint32_t for n follow) case 0x7A: // UTF-8 string (four-byte uint32_t for n follow)
case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow) case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow)
case 0x7F: // UTF-8 string (indefinite length) case 0x7F: // UTF-8 string (indefinite length)
{ return sax->string(get_cbor_string());
return get_cbor_string();
}
// array (0x00..0x17 data items follow) // array (0x00..0x17 data items follow)
case 0x80: case 0x80:
@ -5556,39 +5576,22 @@ class binary_reader
case 0x95: case 0x95:
case 0x96: case 0x96:
case 0x97: case 0x97:
{ return get_cbor_array_sax(sax, static_cast<std::size_t>(current & 0x1F));
return get_cbor_array(current & 0x1F);
}
case 0x98: // array (one-byte uint8_t for n follows) case 0x98: // array (one-byte uint8_t for n follows)
{ return get_cbor_array_sax(sax, static_cast<std::size_t>(get_number<uint8_t>()));
return get_cbor_array(get_number<uint8_t>());
}
case 0x99: // array (two-byte uint16_t for n follow) case 0x99: // array (two-byte uint16_t for n follow)
{ return get_cbor_array_sax(sax, static_cast<std::size_t>(get_number<uint16_t>()));
return get_cbor_array(get_number<uint16_t>());
}
case 0x9A: // array (four-byte uint32_t for n follow) case 0x9A: // array (four-byte uint32_t for n follow)
{ return get_cbor_array_sax(sax, static_cast<std::size_t>(get_number<uint32_t>()));
return get_cbor_array(get_number<uint32_t>());
}
case 0x9B: // array (eight-byte uint64_t for n follow) case 0x9B: // array (eight-byte uint64_t for n follow)
{ return get_cbor_array_sax(sax, static_cast<std::size_t>(get_number<uint64_t>()));
return get_cbor_array(get_number<uint64_t>());
}
case 0x9F: // array (indefinite length) case 0x9F: // array (indefinite length)
{ return get_cbor_array_sax(sax, json_sax_t::no_limit);
BasicJsonType result = value_t::array;
while (get() != 0xFF)
{
result.push_back(parse_cbor_internal(false));
}
return result;
}
// map (0x00..0x17 pairs of data items follow) // map (0x00..0x17 pairs of data items follow)
case 0xA0: case 0xA0:
@ -5615,55 +5618,31 @@ class binary_reader
case 0xB5: case 0xB5:
case 0xB6: case 0xB6:
case 0xB7: case 0xB7:
{ return get_cbor_object_sax(sax, static_cast<std::size_t>(current & 0x1F));
return get_cbor_object(current & 0x1F);
}
case 0xB8: // map (one-byte uint8_t for n follows) case 0xB8: // map (one-byte uint8_t for n follows)
{ return get_cbor_object_sax(sax, static_cast<std::size_t>(get_number<uint8_t>()));
return get_cbor_object(get_number<uint8_t>());
}
case 0xB9: // map (two-byte uint16_t for n follow) case 0xB9: // map (two-byte uint16_t for n follow)
{ return get_cbor_object_sax(sax, static_cast<std::size_t>(get_number<uint16_t>()));
return get_cbor_object(get_number<uint16_t>());
}
case 0xBA: // map (four-byte uint32_t for n follow) case 0xBA: // map (four-byte uint32_t for n follow)
{ return get_cbor_object_sax(sax, static_cast<std::size_t>(get_number<uint32_t>()));
return get_cbor_object(get_number<uint32_t>());
}
case 0xBB: // map (eight-byte uint64_t for n follow) case 0xBB: // map (eight-byte uint64_t for n follow)
{ return get_cbor_object_sax(sax, static_cast<std::size_t>(get_number<uint64_t>()));
return get_cbor_object(get_number<uint64_t>());
}
case 0xBF: // map (indefinite length) case 0xBF: // map (indefinite length)
{ return get_cbor_object_sax(sax, json_sax_t::no_limit);
BasicJsonType result = value_t::object;
while (get() != 0xFF)
{
auto key = get_cbor_string();
result[key] = parse_cbor_internal();
}
return result;
}
case 0xF4: // false case 0xF4: // false
{ return sax->boolean(false);
return false;
}
case 0xF5: // true case 0xF5: // true
{ return sax->boolean(true);
return true;
}
case 0xF6: // null case 0xF6: // null
{ return sax->null();
return value_t::null;
}
case 0xF9: // Half-Precision Float (two-byte IEEE 754) case 0xF9: // Half-Precision Float (two-byte IEEE 754)
{ {
@ -5697,18 +5676,14 @@ class binary_reader
val = (mant == 0) ? std::numeric_limits<double>::infinity() val = (mant == 0) ? std::numeric_limits<double>::infinity()
: std::numeric_limits<double>::quiet_NaN(); : std::numeric_limits<double>::quiet_NaN();
} }
return (half & 0x8000) != 0 ? -val : val; return sax->number_float((half & 0x8000) != 0 ? -val : val, "");
} }
case 0xFA: // Single-Precision Float (four-byte IEEE 754) case 0xFA: // Single-Precision Float (four-byte IEEE 754)
{ return sax->number_float(static_cast<double>(get_number<float>()), "");
return get_number<float>();
}
case 0xFB: // Double-Precision Float (eight-byte IEEE 754) case 0xFB: // Double-Precision Float (eight-byte IEEE 754)
{ return sax->number_float(get_number<double>(), "");
return get_number<double>();
}
default: // anything else (0xFF is handled inside the other types) default: // anything else (0xFF is handled inside the other types)
{ {
@ -5719,13 +5694,14 @@ class binary_reader
} }
} }
BasicJsonType parse_msgpack_internal() bool parse_msgpack_sax_internal(json_sax_t* sax)
{ {
switch (get()) switch (get())
{ {
// EOF // EOF
case std::char_traits<char>::eof(): case std::char_traits<char>::eof():
JSON_THROW(parse_error::create(110, chars_read, "unexpected end of input")); unexpect_eof();
return sax->parse_error(chars_read, "<end of file>", "unexpected end of input");
// positive fixint // positive fixint
case 0x00: case 0x00:
@ -5856,7 +5832,7 @@ class binary_reader
case 0x7D: case 0x7D:
case 0x7E: case 0x7E:
case 0x7F: case 0x7F:
return static_cast<number_unsigned_t>(current); return sax->number_unsigned(static_cast<number_unsigned_t>(current));
// fixmap // fixmap
case 0x80: case 0x80:
@ -5875,9 +5851,7 @@ class binary_reader
case 0x8D: case 0x8D:
case 0x8E: case 0x8E:
case 0x8F: case 0x8F:
{ return get_msgpack_object_sax(sax, static_cast<std::size_t>(current & 0x0F));
return get_msgpack_object(current & 0x0F);
}
// fixarray // fixarray
case 0x90: case 0x90:
@ -5896,9 +5870,7 @@ class binary_reader
case 0x9D: case 0x9D:
case 0x9E: case 0x9E:
case 0x9F: case 0x9F:
{ return get_msgpack_array_sax(sax, static_cast<std::size_t>(current & 0x0F));
return get_msgpack_array(current & 0x0F);
}
// fixstr // fixstr
case 0xA0: case 0xA0:
@ -5933,73 +5905,65 @@ class binary_reader
case 0xBD: case 0xBD:
case 0xBE: case 0xBE:
case 0xBF: case 0xBF:
return get_msgpack_string(); return sax->string(get_msgpack_string());
case 0xC0: // nil case 0xC0: // nil
return value_t::null; return sax->null();
case 0xC2: // false case 0xC2: // false
return false; return sax->boolean(false);
case 0xC3: // true case 0xC3: // true
return true; return sax->boolean(true);
case 0xCA: // float 32 case 0xCA: // float 32
return get_number<float>(); return sax->number_float(static_cast<double>(get_number<float>()), "");
case 0xCB: // float 64 case 0xCB: // float 64
return get_number<double>(); return sax->number_float(get_number<double>(), "");
case 0xCC: // uint 8 case 0xCC: // uint 8
return get_number<uint8_t>(); return sax->number_unsigned(get_number<uint8_t>());
case 0xCD: // uint 16 case 0xCD: // uint 16
return get_number<uint16_t>(); return sax->number_unsigned(get_number<uint16_t>());
case 0xCE: // uint 32 case 0xCE: // uint 32
return get_number<uint32_t>(); return sax->number_unsigned(get_number<uint32_t>());
case 0xCF: // uint 64 case 0xCF: // uint 64
return get_number<uint64_t>(); return sax->number_unsigned(get_number<uint64_t>());
case 0xD0: // int 8 case 0xD0: // int 8
return get_number<int8_t>(); return sax->number_integer(get_number<int8_t>());
case 0xD1: // int 16 case 0xD1: // int 16
return get_number<int16_t>(); return sax->number_integer(get_number<int16_t>());
case 0xD2: // int 32 case 0xD2: // int 32
return get_number<int32_t>(); return sax->number_integer(get_number<int32_t>());
case 0xD3: // int 64 case 0xD3: // int 64
return get_number<int64_t>(); return sax->number_integer(get_number<int64_t>());
case 0xD9: // str 8 case 0xD9: // str 8
case 0xDA: // str 16 case 0xDA: // str 16
case 0xDB: // str 32 case 0xDB: // str 32
return get_msgpack_string(); return sax->string(get_msgpack_string());
case 0xDC: // array 16 case 0xDC: // array 16
{ return get_msgpack_array_sax(sax, static_cast<std::size_t>(get_number<uint16_t>()));
return get_msgpack_array(get_number<uint16_t>());
}
case 0xDD: // array 32 case 0xDD: // array 32
{ return get_msgpack_array_sax(sax, static_cast<std::size_t>(get_number<uint32_t>()));
return get_msgpack_array(get_number<uint32_t>());
}
case 0xDE: // map 16 case 0xDE: // map 16
{ return get_msgpack_object_sax(sax, static_cast<std::size_t>(get_number<uint16_t>()));
return get_msgpack_object(get_number<uint16_t>());
}
case 0xDF: // map 32 case 0xDF: // map 32
{ return get_msgpack_object_sax(sax, static_cast<std::size_t>(get_number<uint32_t>()));
return get_msgpack_object(get_number<uint32_t>());
}
// positive fixint // negative fixint
case 0xE0: case 0xE0:
case 0xE1: case 0xE1:
case 0xE2: case 0xE2:
@ -6032,7 +5996,7 @@ class binary_reader
case 0xFD: case 0xFD:
case 0xFE: case 0xFE:
case 0xFF: case 0xFF:
return static_cast<int8_t>(current); return sax->number_integer(static_cast<int8_t>(current));
default: // anything else default: // anything else
{ {
@ -6049,9 +6013,9 @@ class binary_reader
input (true, default) or whether the last read input (true, default) or whether the last read
character should be considered instead character should be considered instead
*/ */
BasicJsonType parse_ubjson_internal(const bool get_char = true) bool parse_ubjson_sax_internal(json_sax_t* sax, const bool get_char = true)
{ {
return get_ubjson_value(get_char ? get_ignore_noop() : current); return get_ubjson_sax_value(sax, get_char ? get_ignore_noop() : current);
} }
/*! /*!
@ -6191,29 +6155,19 @@ class binary_reader
case 0x75: case 0x75:
case 0x76: case 0x76:
case 0x77: case 0x77:
{
return get_string(current & 0x1F); return get_string(current & 0x1F);
}
case 0x78: // UTF-8 string (one-byte uint8_t for n follows) case 0x78: // UTF-8 string (one-byte uint8_t for n follows)
{
return get_string(get_number<uint8_t>()); return get_string(get_number<uint8_t>());
}
case 0x79: // UTF-8 string (two-byte uint16_t for n follow) case 0x79: // UTF-8 string (two-byte uint16_t for n follow)
{
return get_string(get_number<uint16_t>()); return get_string(get_number<uint16_t>());
}
case 0x7A: // UTF-8 string (four-byte uint32_t for n follow) case 0x7A: // UTF-8 string (four-byte uint32_t for n follow)
{
return get_string(get_number<uint32_t>()); return get_string(get_number<uint32_t>());
}
case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow) case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow)
{
return get_string(get_number<uint64_t>()); return get_string(get_number<uint64_t>());
}
case 0x7F: // UTF-8 string (indefinite length) case 0x7F: // UTF-8 string (indefinite length)
{ {
@ -6234,31 +6188,75 @@ class binary_reader
} }
} }
template<typename NumberType> bool get_cbor_array_sax(json_sax_t* sax, const std::size_t len)
BasicJsonType get_cbor_array(const NumberType len)
{ {
BasicJsonType result = value_t::array; if (not sax->start_array(len))
std::generate_n(std::back_inserter(*result.m_value.array), len, [this]()
{ {
return parse_cbor_internal(); return false;
}); }
return result;
if (len != json_sax_t::no_limit)
for (std::size_t i = 0; i < len; ++i)
{
if (not parse_cbor_sax_internal(sax))
{
return false;
}
}
else
{
while (get() != 0xFF)
{
if (not parse_cbor_sax_internal(sax, false))
{
return false;
}
}
}
return sax->end_array();
} }
template<typename NumberType> bool get_cbor_object_sax(json_sax_t* sax, const std::size_t len)
BasicJsonType get_cbor_object(const NumberType len)
{ {
BasicJsonType result = value_t::object; if (not sax->start_object(len))
std::generate_n(std::inserter(*result.m_value.object,
result.m_value.object->end()),
len, [this]()
{ {
get(); return false;
auto key = get_cbor_string(); }
auto val = parse_cbor_internal();
return std::make_pair(std::move(key), std::move(val)); if (len != json_sax_t::no_limit)
}); {
return result; for (std::size_t i = 0; i < len; ++i)
{
get();
if (not sax->key(get_cbor_string()))
{
return false;
}
if (not parse_cbor_sax_internal(sax))
{
return false;
}
}
}
else
{
while (get() != 0xFF)
{
if (not sax->key(get_cbor_string()))
{
return false;
}
if (not parse_cbor_sax_internal(sax))
{
return false;
}
}
}
return sax->end_object();
} }
/*! /*!
@ -6311,24 +6309,16 @@ class binary_reader
case 0xBD: case 0xBD:
case 0xBE: case 0xBE:
case 0xBF: case 0xBF:
{
return get_string(current & 0x1F); return get_string(current & 0x1F);
}
case 0xD9: // str 8 case 0xD9: // str 8
{
return get_string(get_number<uint8_t>()); return get_string(get_number<uint8_t>());
}
case 0xDA: // str 16 case 0xDA: // str 16
{
return get_string(get_number<uint16_t>()); return get_string(get_number<uint16_t>());
}
case 0xDB: // str 32 case 0xDB: // str 32
{
return get_string(get_number<uint32_t>()); return get_string(get_number<uint32_t>());
}
default: default:
{ {
@ -6340,31 +6330,46 @@ class binary_reader
} }
} }
template<typename NumberType> bool get_msgpack_array_sax(json_sax_t* sax, const std::size_t len)
BasicJsonType get_msgpack_array(const NumberType len)
{ {
BasicJsonType result = value_t::array; if (not sax->start_array(len))
std::generate_n(std::back_inserter(*result.m_value.array), len, [this]()
{ {
return parse_msgpack_internal(); return false;
}); }
return result;
for (std::size_t i = 0; i < len; ++i)
{
if (not parse_msgpack_sax_internal(sax))
{
return false;
}
}
return sax->end_array();
} }
template<typename NumberType> bool get_msgpack_object_sax(json_sax_t* sax, const std::size_t len)
BasicJsonType get_msgpack_object(const NumberType len)
{ {
BasicJsonType result = value_t::object; if (not sax->start_object(len))
std::generate_n(std::inserter(*result.m_value.object, {
result.m_value.object->end()), return false;
len, [this]() }
for (std::size_t i = 0; i < len; ++i)
{ {
get(); get();
auto key = get_msgpack_string(); if (not sax->key(get_msgpack_string()))
auto val = parse_msgpack_internal(); {
return std::make_pair(std::move(key), std::move(val)); return false;
}); }
return result;
if (not parse_msgpack_sax_internal(sax))
{
return false;
}
}
return sax->end_object();
} }
/*! /*!
@ -6412,6 +6417,25 @@ class binary_reader
} }
} }
std::size_t get_ubjson_size_value()
{
switch (get_ignore_noop())
{
case 'U':
return static_cast<std::size_t>(get_number<uint8_t>());
case 'i':
return static_cast<std::size_t>(get_number<int8_t>());
case 'I':
return static_cast<std::size_t>(get_number<int16_t>());
case 'l':
return static_cast<std::size_t>(get_number<int32_t>());
case 'L':
return static_cast<std::size_t>(get_number<int64_t>());
default:
return std::size_t(-1);
}
}
/*! /*!
@brief determine the type and size for a container @brief determine the type and size for a container
@ -6440,45 +6464,46 @@ class binary_reader
JSON_THROW(parse_error::create(112, chars_read, JSON_THROW(parse_error::create(112, chars_read,
"expected '#' after UBJSON type information; last byte: 0x" + ss.str())); "expected '#' after UBJSON type information; last byte: 0x" + ss.str()));
} }
sz = parse_ubjson_internal(); sz = get_ubjson_size_value();
} }
else if (current == '#') else if (current == '#')
{ {
sz = parse_ubjson_internal(); sz = get_ubjson_size_value();
} }
return std::make_pair(sz, tc); return std::make_pair(sz, tc);
} }
BasicJsonType get_ubjson_value(const int prefix) bool get_ubjson_sax_value(json_sax_t* sax, const int prefix)
{ {
switch (prefix) switch (prefix)
{ {
case std::char_traits<char>::eof(): // EOF case std::char_traits<char>::eof(): // EOF
JSON_THROW(parse_error::create(110, chars_read, "unexpected end of input")); unexpect_eof();
return sax->parse_error(chars_read, "<end of file>", "unexpected end of input");
case 'T': // true case 'T': // true
return true; return sax->boolean(true);
case 'F': // false case 'F': // false
return false; return sax->boolean(false);
case 'Z': // null case 'Z': // null
return nullptr; return sax->null();
case 'U': case 'U':
return get_number<uint8_t>(); return sax->number_unsigned(get_number<uint8_t>());
case 'i': case 'i':
return get_number<int8_t>(); return sax->number_integer(get_number<int8_t>());
case 'I': case 'I':
return get_number<int16_t>(); return sax->number_integer(get_number<int16_t>());
case 'l': case 'l':
return get_number<int32_t>(); return sax->number_integer(get_number<int32_t>());
case 'L': case 'L':
return get_number<int64_t>(); return sax->number_integer(get_number<int64_t>());
case 'd': case 'd':
return get_number<float>(); return sax->number_float(static_cast<double>(get_number<float>()), "");
case 'D': case 'D':
return get_number<double>(); return sax->number_float(get_number<double>(), "");
case 'C': // char case 'C': // char
{ {
@ -6491,17 +6516,17 @@ class binary_reader
JSON_THROW(parse_error::create(113, chars_read, JSON_THROW(parse_error::create(113, chars_read,
"byte after 'C' must be in range 0x00..0x7F; last byte: 0x" + ss.str())); "byte after 'C' must be in range 0x00..0x7F; last byte: 0x" + ss.str()));
} }
return string_t(1, static_cast<char>(current)); return sax->string(string_t(1, static_cast<char>(current)));
} }
case 'S': // string case 'S': // string
return get_ubjson_string(); return sax->string(get_ubjson_string());
case '[': // array case '[': // array
return get_ubjson_array(); return get_ubjson_sax_array(sax);
case '{': // object case '{': // object
return get_ubjson_object(); return get_ubjson_sax_object(sax);
default: // anything else default: // anything else
std::stringstream ss; std::stringstream ss;
@ -6511,98 +6536,123 @@ class binary_reader
} }
} }
BasicJsonType get_ubjson_array() bool get_ubjson_sax_array(json_sax_t* sax)
{ {
BasicJsonType result = value_t::array;
const auto size_and_type = get_ubjson_size_type(); const auto size_and_type = get_ubjson_size_type();
if (size_and_type.first != string_t::npos) if (size_and_type.first != string_t::npos)
{ {
if (JSON_UNLIKELY(size_and_type.first > result.max_size())) if (not sax->start_array(size_and_type.first))
{ {
JSON_THROW(out_of_range::create(408, return false;
"excessive array size: " + std::to_string(size_and_type.first)));
} }
if (size_and_type.second != 0) if (size_and_type.second != 0)
{ {
if (size_and_type.second != 'N') if (size_and_type.second != 'N')
{ {
std::generate_n(std::back_inserter(*result.m_value.array), for (std::size_t i = 0; i < size_and_type.first; ++i)
size_and_type.first, [this, size_and_type]()
{ {
return get_ubjson_value(size_and_type.second); if (not get_ubjson_sax_value(sax, size_and_type.second))
}); {
return false;
}
}
} }
} }
else else
{ {
std::generate_n(std::back_inserter(*result.m_value.array), for (std::size_t i = 0; i < size_and_type.first; ++i)
size_and_type.first, [this]()
{ {
return parse_ubjson_internal(); if (not parse_ubjson_sax_internal(sax))
}); {
return false;
}
}
} }
} }
else else
{ {
if (not sax->start_array())
{
return false;
}
while (current != ']') while (current != ']')
{ {
result.push_back(parse_ubjson_internal(false)); if (not parse_ubjson_sax_internal(sax, false))
{
return false;
}
get_ignore_noop(); get_ignore_noop();
} }
} }
return result; return sax->end_array();
} }
BasicJsonType get_ubjson_object() bool get_ubjson_sax_object(json_sax_t* sax)
{ {
BasicJsonType result = value_t::object;
const auto size_and_type = get_ubjson_size_type(); const auto size_and_type = get_ubjson_size_type();
if (size_and_type.first != string_t::npos) if (size_and_type.first != string_t::npos)
{ {
if (JSON_UNLIKELY(size_and_type.first > result.max_size())) if (not sax->start_object(size_and_type.first))
{ {
JSON_THROW(out_of_range::create(408, return false;
"excessive object size: " + std::to_string(size_and_type.first)));
} }
if (size_and_type.second != 0) if (size_and_type.second != 0)
{ {
std::generate_n(std::inserter(*result.m_value.object, for (std::size_t i = 0; i < size_and_type.first; ++i)
result.m_value.object->end()),
size_and_type.first, [this, size_and_type]()
{ {
auto key = get_ubjson_string(); if (not sax->key(get_ubjson_string()))
auto val = get_ubjson_value(size_and_type.second); {
return std::make_pair(std::move(key), std::move(val)); return false;
}); }
if (not get_ubjson_sax_value(sax, size_and_type.second))
{
return false;
}
}
} }
else else
{ {
std::generate_n(std::inserter(*result.m_value.object, for (std::size_t i = 0; i < size_and_type.first; ++i)
result.m_value.object->end()),
size_and_type.first, [this]()
{ {
auto key = get_ubjson_string(); if (not sax->key(get_ubjson_string()))
auto val = parse_ubjson_internal(); {
return std::make_pair(std::move(key), std::move(val)); return false;
}); }
if (not parse_ubjson_sax_internal(sax))
{
return false;
}
}
} }
} }
else else
{ {
if (not sax->start_object())
{
return false;
}
while (current != '}') while (current != '}')
{ {
auto key = get_ubjson_string(false); if (not sax->key(get_ubjson_string(false)))
result[std::move(key)] = parse_ubjson_internal(); {
return false;
}
if (not parse_ubjson_sax_internal(sax))
{
return false;
}
get_ignore_noop(); get_ignore_noop();
} }
} }
return result; return sax->end_object();
} }
/*! /*!
@ -10225,7 +10275,7 @@ class basic_json
template<typename BasicJsonType> template<typename BasicJsonType>
friend class ::nlohmann::detail::binary_reader; friend class ::nlohmann::detail::binary_reader;
template<typename BasicJsonType> template<typename BasicJsonType>
friend class ::nlohmann::json_sax_dom_parser; friend class ::nlohmann::detail::json_sax_dom_parser;
/// workaround type for MSVC /// workaround type for MSVC
using basic_json_t = NLOHMANN_BASIC_JSON_TPL; using basic_json_t = NLOHMANN_BASIC_JSON_TPL;

View file

@ -149,7 +149,7 @@ json parser_helper(const std::string& s)
CHECK(j_nothrow == j); CHECK(j_nothrow == j);
json j_sax; json j_sax;
nlohmann::json_sax_dom_parser<json> sdp(j_sax); nlohmann::detail::json_sax_dom_parser<json> sdp(j_sax);
json::sax_parse(s, &sdp); json::sax_parse(s, &sdp);
CHECK(j_sax == j); CHECK(j_sax == j);