Add binary type support to all binary file formats, as well as an internally represented binary type
This commit is contained in:
parent
6121fc52cf
commit
012c9665ac
21 changed files with 3008 additions and 106 deletions
|
@ -38,6 +38,7 @@ class binary_reader
|
|||
using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
|
||||
using number_float_t = typename BasicJsonType::number_float_t;
|
||||
using string_t = typename BasicJsonType::string_t;
|
||||
using internal_binary_t = typename BasicJsonType::internal_binary_t;
|
||||
using json_sax_t = SAX;
|
||||
|
||||
public:
|
||||
|
@ -207,6 +208,30 @@ class binary_reader
|
|||
return get_string(input_format_t::bson, len - static_cast<NumberType>(1), result) and get() != std::char_traits<char>::eof();
|
||||
}
|
||||
|
||||
/*!
|
||||
@brief Parses a byte array input of length @a len from the BSON input.
|
||||
@param[in] len The length of the byte array to be read.
|
||||
@param[in, out] result A reference to the binary variable where the read
|
||||
array is to be stored.
|
||||
@tparam NumberType The type of the length @a len
|
||||
@pre len >= 0
|
||||
@return `true` if the byte array was successfully parsed
|
||||
*/
|
||||
template<typename NumberType>
|
||||
bool get_bson_binary(const NumberType len, internal_binary_t& result)
|
||||
{
|
||||
if (JSON_HEDLEY_UNLIKELY(len < 0))
|
||||
{
|
||||
auto last_token = get_token_string();
|
||||
return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::bson, "byte array length cannot be negative, is " + std::to_string(len), "binary")));
|
||||
}
|
||||
|
||||
result.has_subtype = true; // All BSON binary values have a subtype
|
||||
get_number<std::uint8_t>(input_format_t::bson, result.subtype);
|
||||
|
||||
return get_binary(input_format_t::bson, len, result);
|
||||
}
|
||||
|
||||
/*!
|
||||
@brief Read a BSON document element of the given @a element_type.
|
||||
@param[in] element_type The BSON element type, c.f. http://bsonspec.org/spec.html
|
||||
|
@ -245,6 +270,13 @@ class binary_reader
|
|||
return parse_bson_array();
|
||||
}
|
||||
|
||||
case 0x05: // binary
|
||||
{
|
||||
std::int32_t len;
|
||||
internal_binary_t value;
|
||||
return get_number<std::int32_t, true>(input_format_t::bson, len) and get_bson_binary(len, value) and sax->binary(value);
|
||||
}
|
||||
|
||||
case 0x08: // boolean
|
||||
{
|
||||
return sax->boolean(get() != 0);
|
||||
|
@ -291,6 +323,7 @@ class binary_reader
|
|||
bool parse_bson_element_list(const bool is_array)
|
||||
{
|
||||
string_t key;
|
||||
|
||||
while (int element_type = get())
|
||||
{
|
||||
if (JSON_HEDLEY_UNLIKELY(not unexpect_eof(input_format_t::bson, "element list")))
|
||||
|
@ -465,6 +498,41 @@ class binary_reader
|
|||
- static_cast<number_integer_t>(number));
|
||||
}
|
||||
|
||||
// Binary data (0x00..0x17 bytes follow)
|
||||
case 0x40:
|
||||
case 0x41:
|
||||
case 0x42:
|
||||
case 0x43:
|
||||
case 0x44:
|
||||
case 0x45:
|
||||
case 0x46:
|
||||
case 0x47:
|
||||
case 0x48:
|
||||
case 0x49:
|
||||
case 0x4A:
|
||||
case 0x4B:
|
||||
case 0x4C:
|
||||
case 0x4D:
|
||||
case 0x4E:
|
||||
case 0x4F:
|
||||
case 0x50:
|
||||
case 0x51:
|
||||
case 0x52:
|
||||
case 0x53:
|
||||
case 0x54:
|
||||
case 0x55:
|
||||
case 0x56:
|
||||
case 0x57:
|
||||
case 0x58: // Binary data (one-byte uint8_t for n follows)
|
||||
case 0x59: // Binary data (two-byte uint16_t for n follow)
|
||||
case 0x5A: // Binary data (four-byte uint32_t for n follow)
|
||||
case 0x5B: // Binary data (eight-byte uint64_t for n follow)
|
||||
case 0x5F: // Binary data (indefinite length)
|
||||
{
|
||||
internal_binary_t b;
|
||||
return get_cbor_binary(b) and sax->binary(b);
|
||||
}
|
||||
|
||||
// UTF-8 string (0x00..0x17 bytes follow)
|
||||
case 0x60:
|
||||
case 0x61:
|
||||
|
@ -780,6 +848,101 @@ class binary_reader
|
|||
}
|
||||
}
|
||||
|
||||
/*!
|
||||
@brief reads a CBOR byte array
|
||||
|
||||
This function first reads starting bytes to determine the expected
|
||||
byte array length and then copies this number of bytes into the byte array.
|
||||
Additionally, CBOR's byte arrays with indefinite lengths are supported.
|
||||
|
||||
@param[out] result created byte array
|
||||
|
||||
@return whether byte array creation completed
|
||||
*/
|
||||
bool get_cbor_binary(internal_binary_t& result)
|
||||
{
|
||||
if (JSON_HEDLEY_UNLIKELY(not unexpect_eof(input_format_t::cbor, "binary")))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
switch (current)
|
||||
{
|
||||
// Binary data (0x00..0x17 bytes follow)
|
||||
case 0x40:
|
||||
case 0x41:
|
||||
case 0x42:
|
||||
case 0x43:
|
||||
case 0x44:
|
||||
case 0x45:
|
||||
case 0x46:
|
||||
case 0x47:
|
||||
case 0x48:
|
||||
case 0x49:
|
||||
case 0x4A:
|
||||
case 0x4B:
|
||||
case 0x4C:
|
||||
case 0x4D:
|
||||
case 0x4E:
|
||||
case 0x4F:
|
||||
case 0x50:
|
||||
case 0x51:
|
||||
case 0x52:
|
||||
case 0x53:
|
||||
case 0x54:
|
||||
case 0x55:
|
||||
case 0x56:
|
||||
case 0x57:
|
||||
{
|
||||
return get_binary(input_format_t::cbor, static_cast<unsigned int>(current) & 0x1Fu, result);
|
||||
}
|
||||
|
||||
case 0x58: // Binary data (one-byte uint8_t for n follows)
|
||||
{
|
||||
std::uint8_t len;
|
||||
return get_number(input_format_t::cbor, len) and get_binary(input_format_t::cbor, len, result);
|
||||
}
|
||||
|
||||
case 0x59: // Binary data (two-byte uint16_t for n follow)
|
||||
{
|
||||
std::uint16_t len;
|
||||
return get_number(input_format_t::cbor, len) and get_binary(input_format_t::cbor, len, result);
|
||||
}
|
||||
|
||||
case 0x5A: // Binary data (four-byte uint32_t for n follow)
|
||||
{
|
||||
std::uint32_t len;
|
||||
return get_number(input_format_t::cbor, len) and get_binary(input_format_t::cbor, len, result);
|
||||
}
|
||||
|
||||
case 0x5B: // Binary data (eight-byte uint64_t for n follow)
|
||||
{
|
||||
std::uint64_t len;
|
||||
return get_number(input_format_t::cbor, len) and get_binary(input_format_t::cbor, len, result);
|
||||
}
|
||||
|
||||
case 0x5F: // Binary data (indefinite length)
|
||||
{
|
||||
while (get() != 0xFF)
|
||||
{
|
||||
internal_binary_t chunk;
|
||||
if (not get_cbor_binary(chunk))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
result.insert(result.end(), chunk.begin(), chunk.end());
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
default:
|
||||
{
|
||||
auto last_token = get_token_string();
|
||||
return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::cbor, "expected length specification (0x40-0x5B) or indefinite binary array type (0x5F); last byte: 0x" + last_token, "binary")));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*!
|
||||
@param[in] len the length of the array or std::size_t(-1) for an
|
||||
array of indefinite size
|
||||
|
@ -1100,6 +1263,22 @@ class binary_reader
|
|||
case 0xC3: // true
|
||||
return sax->boolean(true);
|
||||
|
||||
case 0xC4: // bin 8
|
||||
case 0xC5: // bin 16
|
||||
case 0xC6: // bin 32
|
||||
case 0xC7: // ext 8
|
||||
case 0xC8: // ext 16
|
||||
case 0xC9: // ext 32
|
||||
case 0xD4: // fixext 1
|
||||
case 0xD5: // fixext 2
|
||||
case 0xD6: // fixext 4
|
||||
case 0xD7: // fixext 8
|
||||
case 0xD8: // fixext 16
|
||||
{
|
||||
internal_binary_t b;
|
||||
return get_msgpack_binary(b) and sax->binary(b);
|
||||
}
|
||||
|
||||
case 0xCA: // float 32
|
||||
{
|
||||
float number;
|
||||
|
@ -1309,6 +1488,108 @@ class binary_reader
|
|||
}
|
||||
}
|
||||
|
||||
/*!
|
||||
@brief reads a MessagePack byte array
|
||||
|
||||
This function first reads starting bytes to determine the expected
|
||||
byte array length and then copies this number of bytes into a byte array.
|
||||
|
||||
@param[out] result created byte array
|
||||
|
||||
@return whether byte array creation completed
|
||||
*/
|
||||
bool get_msgpack_binary(internal_binary_t& result)
|
||||
{
|
||||
if (JSON_HEDLEY_UNLIKELY(not unexpect_eof(input_format_t::msgpack, "binary")))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
switch (current)
|
||||
{
|
||||
case 0xC4: // bin 8
|
||||
{
|
||||
std::uint8_t len;
|
||||
return get_number(input_format_t::msgpack, len) and get_binary(input_format_t::msgpack, len, result);
|
||||
}
|
||||
|
||||
case 0xC5: // bin 16
|
||||
{
|
||||
std::uint16_t len;
|
||||
return get_number(input_format_t::msgpack, len) and get_binary(input_format_t::msgpack, len, result);
|
||||
}
|
||||
|
||||
case 0xC6: // bin 32
|
||||
{
|
||||
std::uint32_t len;
|
||||
return get_number(input_format_t::msgpack, len) and get_binary(input_format_t::msgpack, len, result);
|
||||
}
|
||||
|
||||
case 0xC7: // ext 8
|
||||
{
|
||||
std::uint8_t len;
|
||||
result.has_subtype = true;
|
||||
return get_number(input_format_t::msgpack, len) and
|
||||
get_number(input_format_t::msgpack, result.subtype) and
|
||||
get_binary(input_format_t::msgpack, len, result);
|
||||
}
|
||||
|
||||
case 0xC8: // ext 16
|
||||
{
|
||||
std::uint16_t len;
|
||||
result.has_subtype = true;
|
||||
return get_number(input_format_t::msgpack, len) and
|
||||
get_number(input_format_t::msgpack, result.subtype) and
|
||||
get_binary(input_format_t::msgpack, len, result);
|
||||
}
|
||||
|
||||
case 0xC9: // ext 32
|
||||
{
|
||||
std::uint32_t len;
|
||||
result.has_subtype = true;
|
||||
return get_number(input_format_t::msgpack, len) and
|
||||
get_number(input_format_t::msgpack, result.subtype) and
|
||||
get_binary(input_format_t::msgpack, len, result);
|
||||
}
|
||||
|
||||
case 0xD4: // fixext 1
|
||||
{
|
||||
result.has_subtype = true;
|
||||
return get_number(input_format_t::msgpack, result.subtype) and get_binary(input_format_t::msgpack, 1, result);
|
||||
}
|
||||
|
||||
case 0xD5: // fixext 2
|
||||
{
|
||||
result.has_subtype = true;
|
||||
return get_number(input_format_t::msgpack, result.subtype) and get_binary(input_format_t::msgpack, 2, result);
|
||||
}
|
||||
|
||||
case 0xD6: // fixext 4
|
||||
{
|
||||
result.has_subtype = true;
|
||||
return get_number(input_format_t::msgpack, result.subtype) and get_binary(input_format_t::msgpack, 4, result);
|
||||
}
|
||||
|
||||
case 0xD7: // fixext 8
|
||||
{
|
||||
result.has_subtype = true;
|
||||
return get_number(input_format_t::msgpack, result.subtype) and get_binary(input_format_t::msgpack, 8, result);
|
||||
}
|
||||
|
||||
case 0xD8: // fixext 16
|
||||
{
|
||||
result.has_subtype = true;
|
||||
return get_number(input_format_t::msgpack, result.subtype) and get_binary(input_format_t::msgpack, 16, result);
|
||||
}
|
||||
|
||||
default:
|
||||
{
|
||||
auto last_token = get_token_string();
|
||||
return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::msgpack, "expected binary type specification (0xC4-0xC9, 0xD4-0xD8); last byte: 0x" + last_token, "binary")));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*!
|
||||
@param[in] len the length of the array
|
||||
@return whether array creation completed
|
||||
|
@ -1793,6 +2074,9 @@ class binary_reader
|
|||
return sax->end_object();
|
||||
}
|
||||
|
||||
// Note, no reader for UBJSON binary types is implemented because they do
|
||||
// not exist
|
||||
|
||||
///////////////////////
|
||||
// Utility functions //
|
||||
///////////////////////
|
||||
|
@ -1900,6 +2184,38 @@ class binary_reader
|
|||
return success;
|
||||
}
|
||||
|
||||
/*!
|
||||
@brief create a byte array by reading bytes from the input
|
||||
|
||||
@tparam NumberType the type of the number
|
||||
@param[in] format the current format (for diagnostics)
|
||||
@param[in] len number of bytes to read
|
||||
@param[out] result byte array created by reading @a len bytes
|
||||
|
||||
@return whether byte array creation completed
|
||||
|
||||
@note We can not reserve @a len bytes for the result, because @a len
|
||||
may be too large. Usually, @ref unexpect_eof() detects the end of
|
||||
the input before we run out of memory.
|
||||
*/
|
||||
template<typename NumberType>
|
||||
bool get_binary(const input_format_t format,
|
||||
const NumberType len,
|
||||
internal_binary_t& result)
|
||||
{
|
||||
bool success = true;
|
||||
std::generate_n(std::back_inserter(result), len, [this, &success, &format]()
|
||||
{
|
||||
get();
|
||||
if (JSON_HEDLEY_UNLIKELY(not unexpect_eof(format, "binary")))
|
||||
{
|
||||
success = false;
|
||||
}
|
||||
return static_cast<uint8_t>(current);
|
||||
});
|
||||
return success;
|
||||
}
|
||||
|
||||
/*!
|
||||
@param[in] format the current format (for diagnostics)
|
||||
@param[in] context further context information (for diagnostics)
|
||||
|
|
|
@ -31,6 +31,7 @@ struct json_sax
|
|||
using number_float_t = typename BasicJsonType::number_float_t;
|
||||
/// type for strings
|
||||
using string_t = typename BasicJsonType::string_t;
|
||||
using binary_t = typename BasicJsonType::binary_t;
|
||||
|
||||
/*!
|
||||
@brief a null value was read
|
||||
|
@ -75,6 +76,14 @@ struct json_sax
|
|||
*/
|
||||
virtual bool string(string_t& val) = 0;
|
||||
|
||||
/*!
|
||||
@brief a binary string was read
|
||||
@param[in] val binary value
|
||||
@return whether parsing should proceed
|
||||
@note It is safe to move the passed binary.
|
||||
*/
|
||||
virtual bool binary(binary_t& val) = 0;
|
||||
|
||||
/*!
|
||||
@brief the beginning of an object was read
|
||||
@param[in] elements number of object elements or -1 if unknown
|
||||
|
@ -149,6 +158,7 @@ class json_sax_dom_parser
|
|||
using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
|
||||
using number_float_t = typename BasicJsonType::number_float_t;
|
||||
using string_t = typename BasicJsonType::string_t;
|
||||
using binary_t = typename BasicJsonType::binary_t;
|
||||
|
||||
/*!
|
||||
@param[in, out] r reference to a JSON value that is manipulated while
|
||||
|
@ -202,6 +212,12 @@ class json_sax_dom_parser
|
|||
return true;
|
||||
}
|
||||
|
||||
bool binary(binary_t& val)
|
||||
{
|
||||
handle_binary(val);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool start_object(std::size_t len)
|
||||
{
|
||||
ref_stack.push_back(handle_value(BasicJsonType::value_t::object));
|
||||
|
@ -311,6 +327,36 @@ class json_sax_dom_parser
|
|||
return object_element;
|
||||
}
|
||||
|
||||
/*!
|
||||
@invariant If the ref stack is empty, then the passed value will be the new
|
||||
root.
|
||||
@invariant If the ref stack contains a value, then it is an array or an
|
||||
object to which we can add elements
|
||||
*/
|
||||
template<typename BinaryValue>
|
||||
JSON_HEDLEY_RETURNS_NON_NULL
|
||||
BasicJsonType* handle_binary(BinaryValue&& v)
|
||||
{
|
||||
if (ref_stack.empty())
|
||||
{
|
||||
root = BasicJsonType::binary_array(std::forward<BinaryValue>(v));
|
||||
return &root;
|
||||
}
|
||||
|
||||
assert(ref_stack.back()->is_array() or ref_stack.back()->is_object());
|
||||
|
||||
if (ref_stack.back()->is_array())
|
||||
{
|
||||
ref_stack.back()->m_value.array->emplace_back(BasicJsonType::binary_array(std::forward<BinaryValue>(v)));
|
||||
return &(ref_stack.back()->m_value.array->back());
|
||||
}
|
||||
|
||||
assert(ref_stack.back()->is_object());
|
||||
assert(object_element);
|
||||
*object_element = BasicJsonType::binary_array(std::forward<BinaryValue>(v));
|
||||
return object_element;
|
||||
}
|
||||
|
||||
/// the parsed JSON value
|
||||
BasicJsonType& root;
|
||||
/// stack to model hierarchy of values
|
||||
|
@ -331,6 +377,7 @@ class json_sax_dom_callback_parser
|
|||
using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
|
||||
using number_float_t = typename BasicJsonType::number_float_t;
|
||||
using string_t = typename BasicJsonType::string_t;
|
||||
using binary_t = typename BasicJsonType::binary_t;
|
||||
using parser_callback_t = typename BasicJsonType::parser_callback_t;
|
||||
using parse_event_t = typename BasicJsonType::parse_event_t;
|
||||
|
||||
|
@ -385,6 +432,12 @@ class json_sax_dom_callback_parser
|
|||
return true;
|
||||
}
|
||||
|
||||
bool binary(binary_t& val)
|
||||
{
|
||||
handle_value(val);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool start_object(std::size_t len)
|
||||
{
|
||||
// check callback for object start
|
||||
|
@ -635,6 +688,7 @@ class json_sax_acceptor
|
|||
using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
|
||||
using number_float_t = typename BasicJsonType::number_float_t;
|
||||
using string_t = typename BasicJsonType::string_t;
|
||||
using binary_t = typename BasicJsonType::binary_t;
|
||||
|
||||
bool null()
|
||||
{
|
||||
|
@ -666,7 +720,12 @@ class json_sax_acceptor
|
|||
return true;
|
||||
}
|
||||
|
||||
bool start_object(std::size_t /*unused*/ = std::size_t(-1))
|
||||
bool binary(binary_t& /*unused*/)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
bool start_object(std::size_t /*unused*/ = std::size_t(-1))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
@ -681,7 +740,7 @@ class json_sax_acceptor
|
|||
return true;
|
||||
}
|
||||
|
||||
bool start_array(std::size_t /*unused*/ = std::size_t(-1))
|
||||
bool start_array(std::size_t /*unused*/ = std::size_t(-1))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue