♻️ reorganized code

This commit is contained in:
Niels Lohmann 2018-01-09 18:30:02 +01:00
parent b67e00b9b5
commit 0a2920e0fd
No known key found for this signature in database
GPG key ID: 7F3CEA63AE251B69
27 changed files with 19205 additions and 19134 deletions

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,558 @@
#ifndef NLOHMANN_JSON_DETAIL_PARSING_BINARY_WRITER_HPP
#define NLOHMANN_JSON_DETAIL_PARSING_BINARY_WRITER_HPP
#include <algorithm> // reverse
#include <array> // array
#include <cstdint> // uint8_t, uint16_t, uint32_t, uint64_t
#include <cstring> // memcpy
#include <limits> // numeric_limits
#include "detail/parsing/binary_reader.hpp"
#include "detail/parsing/output_adapters.hpp"
namespace nlohmann
{
namespace detail
{
///////////////////
// binary writer //
///////////////////
/*!
@brief serialization to CBOR and MessagePack values
*/
template<typename BasicJsonType, typename CharType>
class binary_writer
{
public:
/*!
@brief create a binary writer
@param[in] adapter output adapter to write to
*/
explicit binary_writer(output_adapter_t<CharType> adapter) : oa(adapter)
{
assert(oa);
}
/*!
@brief[in] j JSON value to serialize
*/
void write_cbor(const BasicJsonType& j)
{
switch (j.type())
{
case value_t::null:
{
oa->write_character(static_cast<CharType>(0xF6));
break;
}
case value_t::boolean:
{
oa->write_character(j.m_value.boolean
? static_cast<CharType>(0xF5)
: static_cast<CharType>(0xF4));
break;
}
case value_t::number_integer:
{
if (j.m_value.number_integer >= 0)
{
// CBOR does not differentiate between positive signed
// integers and unsigned integers. Therefore, we used the
// code from the value_t::number_unsigned case here.
if (j.m_value.number_integer <= 0x17)
{
write_number(static_cast<uint8_t>(j.m_value.number_integer));
}
else if (j.m_value.number_integer <= (std::numeric_limits<uint8_t>::max)())
{
oa->write_character(static_cast<CharType>(0x18));
write_number(static_cast<uint8_t>(j.m_value.number_integer));
}
else if (j.m_value.number_integer <= (std::numeric_limits<uint16_t>::max)())
{
oa->write_character(static_cast<CharType>(0x19));
write_number(static_cast<uint16_t>(j.m_value.number_integer));
}
else if (j.m_value.number_integer <= (std::numeric_limits<uint32_t>::max)())
{
oa->write_character(static_cast<CharType>(0x1A));
write_number(static_cast<uint32_t>(j.m_value.number_integer));
}
else
{
oa->write_character(static_cast<CharType>(0x1B));
write_number(static_cast<uint64_t>(j.m_value.number_integer));
}
}
else
{
// The conversions below encode the sign in the first
// byte, and the value is converted to a positive number.
const auto positive_number = -1 - j.m_value.number_integer;
if (j.m_value.number_integer >= -24)
{
write_number(static_cast<uint8_t>(0x20 + positive_number));
}
else if (positive_number <= (std::numeric_limits<uint8_t>::max)())
{
oa->write_character(static_cast<CharType>(0x38));
write_number(static_cast<uint8_t>(positive_number));
}
else if (positive_number <= (std::numeric_limits<uint16_t>::max)())
{
oa->write_character(static_cast<CharType>(0x39));
write_number(static_cast<uint16_t>(positive_number));
}
else if (positive_number <= (std::numeric_limits<uint32_t>::max)())
{
oa->write_character(static_cast<CharType>(0x3A));
write_number(static_cast<uint32_t>(positive_number));
}
else
{
oa->write_character(static_cast<CharType>(0x3B));
write_number(static_cast<uint64_t>(positive_number));
}
}
break;
}
case value_t::number_unsigned:
{
if (j.m_value.number_unsigned <= 0x17)
{
write_number(static_cast<uint8_t>(j.m_value.number_unsigned));
}
else if (j.m_value.number_unsigned <= (std::numeric_limits<uint8_t>::max)())
{
oa->write_character(static_cast<CharType>(0x18));
write_number(static_cast<uint8_t>(j.m_value.number_unsigned));
}
else if (j.m_value.number_unsigned <= (std::numeric_limits<uint16_t>::max)())
{
oa->write_character(static_cast<CharType>(0x19));
write_number(static_cast<uint16_t>(j.m_value.number_unsigned));
}
else if (j.m_value.number_unsigned <= (std::numeric_limits<uint32_t>::max)())
{
oa->write_character(static_cast<CharType>(0x1A));
write_number(static_cast<uint32_t>(j.m_value.number_unsigned));
}
else
{
oa->write_character(static_cast<CharType>(0x1B));
write_number(static_cast<uint64_t>(j.m_value.number_unsigned));
}
break;
}
case value_t::number_float: // Double-Precision Float
{
oa->write_character(static_cast<CharType>(0xFB));
write_number(j.m_value.number_float);
break;
}
case value_t::string:
{
// step 1: write control byte and the string length
const auto N = j.m_value.string->size();
if (N <= 0x17)
{
write_number(static_cast<uint8_t>(0x60 + N));
}
else if (N <= 0xFF)
{
oa->write_character(static_cast<CharType>(0x78));
write_number(static_cast<uint8_t>(N));
}
else if (N <= 0xFFFF)
{
oa->write_character(static_cast<CharType>(0x79));
write_number(static_cast<uint16_t>(N));
}
else if (N <= 0xFFFFFFFF)
{
oa->write_character(static_cast<CharType>(0x7A));
write_number(static_cast<uint32_t>(N));
}
// LCOV_EXCL_START
else if (N <= 0xFFFFFFFFFFFFFFFF)
{
oa->write_character(static_cast<CharType>(0x7B));
write_number(static_cast<uint64_t>(N));
}
// LCOV_EXCL_STOP
// step 2: write the string
oa->write_characters(
reinterpret_cast<const CharType*>(j.m_value.string->c_str()),
j.m_value.string->size());
break;
}
case value_t::array:
{
// step 1: write control byte and the array size
const auto N = j.m_value.array->size();
if (N <= 0x17)
{
write_number(static_cast<uint8_t>(0x80 + N));
}
else if (N <= 0xFF)
{
oa->write_character(static_cast<CharType>(0x98));
write_number(static_cast<uint8_t>(N));
}
else if (N <= 0xFFFF)
{
oa->write_character(static_cast<CharType>(0x99));
write_number(static_cast<uint16_t>(N));
}
else if (N <= 0xFFFFFFFF)
{
oa->write_character(static_cast<CharType>(0x9A));
write_number(static_cast<uint32_t>(N));
}
// LCOV_EXCL_START
else if (N <= 0xFFFFFFFFFFFFFFFF)
{
oa->write_character(static_cast<CharType>(0x9B));
write_number(static_cast<uint64_t>(N));
}
// LCOV_EXCL_STOP
// step 2: write each element
for (const auto& el : *j.m_value.array)
{
write_cbor(el);
}
break;
}
case value_t::object:
{
// step 1: write control byte and the object size
const auto N = j.m_value.object->size();
if (N <= 0x17)
{
write_number(static_cast<uint8_t>(0xA0 + N));
}
else if (N <= 0xFF)
{
oa->write_character(static_cast<CharType>(0xB8));
write_number(static_cast<uint8_t>(N));
}
else if (N <= 0xFFFF)
{
oa->write_character(static_cast<CharType>(0xB9));
write_number(static_cast<uint16_t>(N));
}
else if (N <= 0xFFFFFFFF)
{
oa->write_character(static_cast<CharType>(0xBA));
write_number(static_cast<uint32_t>(N));
}
// LCOV_EXCL_START
else if (N <= 0xFFFFFFFFFFFFFFFF)
{
oa->write_character(static_cast<CharType>(0xBB));
write_number(static_cast<uint64_t>(N));
}
// LCOV_EXCL_STOP
// step 2: write each element
for (const auto& el : *j.m_value.object)
{
write_cbor(el.first);
write_cbor(el.second);
}
break;
}
default:
break;
}
}
/*!
@brief[in] j JSON value to serialize
*/
void write_msgpack(const BasicJsonType& j)
{
switch (j.type())
{
case value_t::null: // nil
{
oa->write_character(static_cast<CharType>(0xC0));
break;
}
case value_t::boolean: // true and false
{
oa->write_character(j.m_value.boolean
? static_cast<CharType>(0xC3)
: static_cast<CharType>(0xC2));
break;
}
case value_t::number_integer:
{
if (j.m_value.number_integer >= 0)
{
// MessagePack does not differentiate between positive
// signed integers and unsigned integers. Therefore, we used
// the code from the value_t::number_unsigned case here.
if (j.m_value.number_unsigned < 128)
{
// positive fixnum
write_number(static_cast<uint8_t>(j.m_value.number_integer));
}
else if (j.m_value.number_unsigned <= (std::numeric_limits<uint8_t>::max)())
{
// uint 8
oa->write_character(static_cast<CharType>(0xCC));
write_number(static_cast<uint8_t>(j.m_value.number_integer));
}
else if (j.m_value.number_unsigned <= (std::numeric_limits<uint16_t>::max)())
{
// uint 16
oa->write_character(static_cast<CharType>(0xCD));
write_number(static_cast<uint16_t>(j.m_value.number_integer));
}
else if (j.m_value.number_unsigned <= (std::numeric_limits<uint32_t>::max)())
{
// uint 32
oa->write_character(static_cast<CharType>(0xCE));
write_number(static_cast<uint32_t>(j.m_value.number_integer));
}
else if (j.m_value.number_unsigned <= (std::numeric_limits<uint64_t>::max)())
{
// uint 64
oa->write_character(static_cast<CharType>(0xCF));
write_number(static_cast<uint64_t>(j.m_value.number_integer));
}
}
else
{
if (j.m_value.number_integer >= -32)
{
// negative fixnum
write_number(static_cast<int8_t>(j.m_value.number_integer));
}
else if (j.m_value.number_integer >= (std::numeric_limits<int8_t>::min)() and
j.m_value.number_integer <= (std::numeric_limits<int8_t>::max)())
{
// int 8
oa->write_character(static_cast<CharType>(0xD0));
write_number(static_cast<int8_t>(j.m_value.number_integer));
}
else if (j.m_value.number_integer >= (std::numeric_limits<int16_t>::min)() and
j.m_value.number_integer <= (std::numeric_limits<int16_t>::max)())
{
// int 16
oa->write_character(static_cast<CharType>(0xD1));
write_number(static_cast<int16_t>(j.m_value.number_integer));
}
else if (j.m_value.number_integer >= (std::numeric_limits<int32_t>::min)() and
j.m_value.number_integer <= (std::numeric_limits<int32_t>::max)())
{
// int 32
oa->write_character(static_cast<CharType>(0xD2));
write_number(static_cast<int32_t>(j.m_value.number_integer));
}
else if (j.m_value.number_integer >= (std::numeric_limits<int64_t>::min)() and
j.m_value.number_integer <= (std::numeric_limits<int64_t>::max)())
{
// int 64
oa->write_character(static_cast<CharType>(0xD3));
write_number(static_cast<int64_t>(j.m_value.number_integer));
}
}
break;
}
case value_t::number_unsigned:
{
if (j.m_value.number_unsigned < 128)
{
// positive fixnum
write_number(static_cast<uint8_t>(j.m_value.number_integer));
}
else if (j.m_value.number_unsigned <= (std::numeric_limits<uint8_t>::max)())
{
// uint 8
oa->write_character(static_cast<CharType>(0xCC));
write_number(static_cast<uint8_t>(j.m_value.number_integer));
}
else if (j.m_value.number_unsigned <= (std::numeric_limits<uint16_t>::max)())
{
// uint 16
oa->write_character(static_cast<CharType>(0xCD));
write_number(static_cast<uint16_t>(j.m_value.number_integer));
}
else if (j.m_value.number_unsigned <= (std::numeric_limits<uint32_t>::max)())
{
// uint 32
oa->write_character(static_cast<CharType>(0xCE));
write_number(static_cast<uint32_t>(j.m_value.number_integer));
}
else if (j.m_value.number_unsigned <= (std::numeric_limits<uint64_t>::max)())
{
// uint 64
oa->write_character(static_cast<CharType>(0xCF));
write_number(static_cast<uint64_t>(j.m_value.number_integer));
}
break;
}
case value_t::number_float: // float 64
{
oa->write_character(static_cast<CharType>(0xCB));
write_number(j.m_value.number_float);
break;
}
case value_t::string:
{
// step 1: write control byte and the string length
const auto N = j.m_value.string->size();
if (N <= 31)
{
// fixstr
write_number(static_cast<uint8_t>(0xA0 | N));
}
else if (N <= 255)
{
// str 8
oa->write_character(static_cast<CharType>(0xD9));
write_number(static_cast<uint8_t>(N));
}
else if (N <= 65535)
{
// str 16
oa->write_character(static_cast<CharType>(0xDA));
write_number(static_cast<uint16_t>(N));
}
else if (N <= 4294967295)
{
// str 32
oa->write_character(static_cast<CharType>(0xDB));
write_number(static_cast<uint32_t>(N));
}
// step 2: write the string
oa->write_characters(
reinterpret_cast<const CharType*>(j.m_value.string->c_str()),
j.m_value.string->size());
break;
}
case value_t::array:
{
// step 1: write control byte and the array size
const auto N = j.m_value.array->size();
if (N <= 15)
{
// fixarray
write_number(static_cast<uint8_t>(0x90 | N));
}
else if (N <= 0xFFFF)
{
// array 16
oa->write_character(static_cast<CharType>(0xDC));
write_number(static_cast<uint16_t>(N));
}
else if (N <= 0xFFFFFFFF)
{
// array 32
oa->write_character(static_cast<CharType>(0xDD));
write_number(static_cast<uint32_t>(N));
}
// step 2: write each element
for (const auto& el : *j.m_value.array)
{
write_msgpack(el);
}
break;
}
case value_t::object:
{
// step 1: write control byte and the object size
const auto N = j.m_value.object->size();
if (N <= 15)
{
// fixmap
write_number(static_cast<uint8_t>(0x80 | (N & 0xF)));
}
else if (N <= 65535)
{
// map 16
oa->write_character(static_cast<CharType>(0xDE));
write_number(static_cast<uint16_t>(N));
}
else if (N <= 4294967295)
{
// map 32
oa->write_character(static_cast<CharType>(0xDF));
write_number(static_cast<uint32_t>(N));
}
// step 2: write each element
for (const auto& el : *j.m_value.object)
{
write_msgpack(el.first);
write_msgpack(el.second);
}
break;
}
default:
break;
}
}
private:
/*
@brief write a number to output input
@param[in] n number of type @a NumberType
@tparam NumberType the type of the number
@note This function needs to respect the system's endianess, because bytes
in CBOR and MessagePack are stored in network order (big endian) and
therefore need reordering on little endian systems.
*/
template<typename NumberType> void write_number(NumberType n)
{
// step 1: write number to array of length NumberType
std::array<CharType, sizeof(NumberType)> vec;
std::memcpy(vec.data(), &n, sizeof(NumberType));
// step 2: write array to output (with possible reordering)
if (is_little_endian)
{
// reverse byte order prior to conversion if necessary
std::reverse(vec.begin(), vec.end());
}
oa->write_characters(vec.data(), sizeof(NumberType));
}
private:
/// whether we can assume little endianess
const bool is_little_endian = binary_reader<BasicJsonType>::little_endianess();
/// the output
output_adapter_t<CharType> oa = nullptr;
};
}
}
#endif

View file

@ -0,0 +1,265 @@
#ifndef NLOHMANN_JSON_DETAIL_PARSING_INPUT_ADAPTERS_HPP
#define NLOHMANN_JSON_DETAIL_PARSING_INPUT_ADAPTERS_HPP
#include <algorithm> // min
#include <array> // array
#include <cassert> // assert
#include <cstddef> // size_t
#include <cstring> // strlen
#include <ios> // streamsize, streamoff, streampos
#include <istream> // istream
#include <iterator> // begin, end, iterator_traits, random_access_iterator_tag, distance, next
#include <memory> // shared_ptr, make_shared, addressof
#include <numeric> // accumulate
#include <string> // string, char_traits
#include <type_traits> // enable_if, is_base_of, is_pointer, is_integral, remove_pointer
#include <utility> // pair, declval
#include "detail/macro_scope.hpp"
namespace nlohmann
{
namespace detail
{
////////////////////
// input adapters //
////////////////////
/*!
@brief abstract input adapter interface
Produces a stream of std::char_traits<char>::int_type characters from a
std::istream, a buffer, or some other input type. Accepts the return of exactly
one non-EOF character for future input. The int_type characters returned
consist of all valid char values as positive values (typically unsigned char),
plus an EOF value outside that range, specified by the value of the function
std::char_traits<char>::eof(). This value is typically -1, but could be any
arbitrary value which is not a valid char value.
*/
struct input_adapter_protocol
{
/// get a character [0,255] or std::char_traits<char>::eof().
virtual std::char_traits<char>::int_type get_character() = 0;
/// restore the last non-eof() character to input
virtual void unget_character() = 0;
virtual ~input_adapter_protocol() = default;
};
/// a type to simplify interfaces
using input_adapter_t = std::shared_ptr<input_adapter_protocol>;
/*!
Input adapter for a (caching) istream. Ignores a UFT Byte Order Mark at
beginning of input. Does not support changing the underlying std::streambuf
in mid-input. Maintains underlying std::istream and std::streambuf to support
subsequent use of standard std::istream operations to process any input
characters following those used in parsing the JSON input. Clears the
std::istream flags; any input errors (e.g., EOF) will be detected by the first
subsequent call for input from the std::istream.
*/
class input_stream_adapter : public input_adapter_protocol
{
public:
~input_stream_adapter() override
{
// clear stream flags; we use underlying streambuf I/O, do not
// maintain ifstream flags
is.clear();
}
explicit input_stream_adapter(std::istream& i)
: is(i), sb(*i.rdbuf())
{
// skip byte order mark
std::char_traits<char>::int_type c;
if ((c = get_character()) == 0xEF)
{
if ((c = get_character()) == 0xBB)
{
if ((c = get_character()) == 0xBF)
{
return; // Ignore BOM
}
else if (c != std::char_traits<char>::eof())
{
is.unget();
}
is.putback('\xBB');
}
else if (c != std::char_traits<char>::eof())
{
is.unget();
}
is.putback('\xEF');
}
else if (c != std::char_traits<char>::eof())
{
is.unget(); // no byte order mark; process as usual
}
}
// delete because of pointer members
input_stream_adapter(const input_stream_adapter&) = delete;
input_stream_adapter& operator=(input_stream_adapter&) = delete;
// std::istream/std::streambuf use std::char_traits<char>::to_int_type, to
// ensure that std::char_traits<char>::eof() and the character 0xFF do not
// end up as the same value, eg. 0xFFFFFFFF.
std::char_traits<char>::int_type get_character() override
{
return sb.sbumpc();
}
void unget_character() override
{
sb.sungetc(); // is.unget() avoided for performance
}
private:
/// the associated input stream
std::istream& is;
std::streambuf& sb;
};
/// input adapter for buffer input
class input_buffer_adapter : public input_adapter_protocol
{
public:
input_buffer_adapter(const char* b, const std::size_t l)
: cursor(b), limit(b + l), start(b)
{
// skip byte order mark
if (l >= 3 and b[0] == '\xEF' and b[1] == '\xBB' and b[2] == '\xBF')
{
cursor += 3;
}
}
// delete because of pointer members
input_buffer_adapter(const input_buffer_adapter&) = delete;
input_buffer_adapter& operator=(input_buffer_adapter&) = delete;
std::char_traits<char>::int_type get_character() noexcept override
{
if (JSON_LIKELY(cursor < limit))
{
return std::char_traits<char>::to_int_type(*(cursor++));
}
return std::char_traits<char>::eof();
}
void unget_character() noexcept override
{
if (JSON_LIKELY(cursor > start))
{
--cursor;
}
}
private:
/// pointer to the current character
const char* cursor;
/// pointer past the last character
const char* limit;
/// pointer to the first character
const char* start;
};
class input_adapter
{
public:
// native support
/// input adapter for input stream
input_adapter(std::istream& i)
: ia(std::make_shared<input_stream_adapter>(i)) {}
/// input adapter for input stream
input_adapter(std::istream&& i)
: ia(std::make_shared<input_stream_adapter>(i)) {}
/// input adapter for buffer
template<typename CharT,
typename std::enable_if<
std::is_pointer<CharT>::value and
std::is_integral<typename std::remove_pointer<CharT>::type>::value and
sizeof(typename std::remove_pointer<CharT>::type) == 1,
int>::type = 0>
input_adapter(CharT b, std::size_t l)
: ia(std::make_shared<input_buffer_adapter>(reinterpret_cast<const char*>(b), l)) {}
// derived support
/// input adapter for string literal
template<typename CharT,
typename std::enable_if<
std::is_pointer<CharT>::value and
std::is_integral<typename std::remove_pointer<CharT>::type>::value and
sizeof(typename std::remove_pointer<CharT>::type) == 1,
int>::type = 0>
input_adapter(CharT b)
: input_adapter(reinterpret_cast<const char*>(b),
std::strlen(reinterpret_cast<const char*>(b))) {}
/// input adapter for iterator range with contiguous storage
template<class IteratorType,
typename std::enable_if<
std::is_same<typename std::iterator_traits<IteratorType>::iterator_category, std::random_access_iterator_tag>::value,
int>::type = 0>
input_adapter(IteratorType first, IteratorType last)
{
// assertion to check that the iterator range is indeed contiguous,
// see http://stackoverflow.com/a/35008842/266378 for more discussion
assert(std::accumulate(
first, last, std::pair<bool, int>(true, 0),
[&first](std::pair<bool, int> res, decltype(*first) val)
{
res.first &= (val == *(std::next(std::addressof(*first), res.second++)));
return res;
}).first);
// assertion to check that each element is 1 byte long
static_assert(
sizeof(typename std::iterator_traits<IteratorType>::value_type) == 1,
"each element in the iterator range must have the size of 1 byte");
const auto len = static_cast<size_t>(std::distance(first, last));
if (JSON_LIKELY(len > 0))
{
// there is at least one element: use the address of first
ia = std::make_shared<input_buffer_adapter>(reinterpret_cast<const char*>(&(*first)), len);
}
else
{
// the address of first cannot be used: use nullptr
ia = std::make_shared<input_buffer_adapter>(nullptr, len);
}
}
/// input adapter for array
template<class T, std::size_t N>
input_adapter(T (&array)[N])
: input_adapter(std::begin(array), std::end(array)) {}
/// input adapter for contiguous container
template<class ContiguousContainer, typename
std::enable_if<not std::is_pointer<ContiguousContainer>::value and
std::is_base_of<std::random_access_iterator_tag, typename std::iterator_traits<decltype(std::begin(std::declval<ContiguousContainer const>()))>::iterator_category>::value,
int>::type = 0>
input_adapter(const ContiguousContainer& c)
: input_adapter(std::begin(c), std::end(c)) {}
operator input_adapter_t()
{
return ia;
}
private:
/// the actual adapter
input_adapter_t ia = nullptr;
};
}
}
#endif

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,116 @@
#ifndef NLOHMANN_JSON_DETAIL_PARSING_OUTPUT_ADAPTERS_HPP
#define NLOHMANN_JSON_DETAIL_PARSING_OUTPUT_ADAPTERS_HPP
#include <algorithm> // copy
#include <cstddef> // size_t
#include <ios> // streamsize
#include <iterator> // back_inserter
#include <memory> // shared_ptr, make_shared
#include <ostream> // basic_ostream
#include <string> // basic_string
#include <vector> // vector
namespace nlohmann
{
namespace detail
{
/// abstract output adapter interface
template<typename CharType> struct output_adapter_protocol
{
virtual void write_character(CharType c) = 0;
virtual void write_characters(const CharType* s, std::size_t length) = 0;
virtual ~output_adapter_protocol() = default;
};
/// a type to simplify interfaces
template<typename CharType>
using output_adapter_t = std::shared_ptr<output_adapter_protocol<CharType>>;
/// output adapter for byte vectors
template<typename CharType>
class output_vector_adapter : public output_adapter_protocol<CharType>
{
public:
explicit output_vector_adapter(std::vector<CharType>& vec) : v(vec) {}
void write_character(CharType c) override
{
v.push_back(c);
}
void write_characters(const CharType* s, std::size_t length) override
{
std::copy(s, s + length, std::back_inserter(v));
}
private:
std::vector<CharType>& v;
};
/// output adapter for output streams
template<typename CharType>
class output_stream_adapter : public output_adapter_protocol<CharType>
{
public:
explicit output_stream_adapter(std::basic_ostream<CharType>& s) : stream(s) {}
void write_character(CharType c) override
{
stream.put(c);
}
void write_characters(const CharType* s, std::size_t length) override
{
stream.write(s, static_cast<std::streamsize>(length));
}
private:
std::basic_ostream<CharType>& stream;
};
/// output adapter for basic_string
template<typename CharType>
class output_string_adapter : public output_adapter_protocol<CharType>
{
public:
explicit output_string_adapter(std::basic_string<CharType>& s) : str(s) {}
void write_character(CharType c) override
{
str.push_back(c);
}
void write_characters(const CharType* s, std::size_t length) override
{
str.append(s, length);
}
private:
std::basic_string<CharType>& str;
};
template<typename CharType>
class output_adapter
{
public:
output_adapter(std::vector<CharType>& vec)
: oa(std::make_shared<output_vector_adapter<CharType>>(vec)) {}
output_adapter(std::basic_ostream<CharType>& s)
: oa(std::make_shared<output_stream_adapter<CharType>>(s)) {}
output_adapter(std::basic_string<CharType>& s)
: oa(std::make_shared<output_string_adapter<CharType>>(s)) {}
operator output_adapter_t<CharType>()
{
return oa;
}
private:
output_adapter_t<CharType> oa = nullptr;
};
}
}
#endif

View file

@ -0,0 +1,592 @@
#ifndef NLOHMANN_JSON_DETAIL_PARSING_PARSER_HPP
#define NLOHMANN_JSON_DETAIL_PARSING_PARSER_HPP
#include <cassert> // assert
#include <cmath> // isfinite
#include <cstdint> // uint8_t
#include <functional> // function
#include <string> // string
#include <utility> // move
#include "detail/exceptions.hpp"
#include "detail/macro_scope.hpp"
#include "detail/parsing/input_adapters.hpp"
#include "detail/parsing/lexer.hpp"
#include "detail/value_t.hpp"
namespace nlohmann
{
namespace detail
{
////////////
// parser //
////////////
/*!
@brief syntax analysis
This class implements a recursive decent parser.
*/
template<typename BasicJsonType>
class parser
{
using number_integer_t = typename BasicJsonType::number_integer_t;
using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
using number_float_t = typename BasicJsonType::number_float_t;
using lexer_t = lexer<BasicJsonType>;
using token_type = typename lexer_t::token_type;
public:
enum class parse_event_t : uint8_t
{
/// the parser read `{` and started to process a JSON object
object_start,
/// the parser read `}` and finished processing a JSON object
object_end,
/// the parser read `[` and started to process a JSON array
array_start,
/// the parser read `]` and finished processing a JSON array
array_end,
/// the parser read a key of a value in an object
key,
/// the parser finished reading a JSON value
value
};
using parser_callback_t =
std::function<bool(int depth, parse_event_t event, BasicJsonType& parsed)>;
/// a parser reading from an input adapter
explicit parser(detail::input_adapter_t adapter,
const parser_callback_t cb = nullptr,
const bool allow_exceptions_ = true)
: callback(cb), m_lexer(adapter), allow_exceptions(allow_exceptions_)
{}
/*!
@brief public parser interface
@param[in] strict whether to expect the last token to be EOF
@param[in,out] result parsed JSON value
@throw parse_error.101 in case of an unexpected token
@throw parse_error.102 if to_unicode fails or surrogate error
@throw parse_error.103 if to_unicode fails
*/
void parse(const bool strict, BasicJsonType& result)
{
// read first token
get_token();
parse_internal(true, result);
result.assert_invariant();
// in strict mode, input must be completely read
if (strict)
{
get_token();
expect(token_type::end_of_input);
}
// in case of an error, return discarded value
if (errored)
{
result = value_t::discarded;
return;
}
// set top-level value to null if it was discarded by the callback
// function
if (result.is_discarded())
{
result = nullptr;
}
}
/*!
@brief public accept interface
@param[in] strict whether to expect the last token to be EOF
@return whether the input is a proper JSON text
*/
bool accept(const bool strict = true)
{
// read first token
get_token();
if (not accept_internal())
{
return false;
}
// strict => last token must be EOF
return not strict or (get_token() == token_type::end_of_input);
}
private:
/*!
@brief the actual parser
@throw parse_error.101 in case of an unexpected token
@throw parse_error.102 if to_unicode fails or surrogate error
@throw parse_error.103 if to_unicode fails
*/
void parse_internal(bool keep, BasicJsonType& result)
{
// never parse after a parse error was detected
assert(not errored);
// start with a discarded value
if (not result.is_discarded())
{
result.m_value.destroy(result.m_type);
result.m_type = value_t::discarded;
}
switch (last_token)
{
case token_type::begin_object:
{
if (keep)
{
if (callback)
{
keep = callback(depth++, parse_event_t::object_start, result);
}
if (not callback or keep)
{
// explicitly set result to object to cope with {}
result.m_type = value_t::object;
result.m_value = value_t::object;
}
}
// read next token
get_token();
// closing } -> we are done
if (last_token == token_type::end_object)
{
if (keep and callback and not callback(--depth, parse_event_t::object_end, result))
{
result.m_value.destroy(result.m_type);
result.m_type = value_t::discarded;
}
break;
}
// parse values
std::string key;
BasicJsonType value;
while (true)
{
// store key
if (not expect(token_type::value_string))
{
return;
}
key = m_lexer.move_string();
bool keep_tag = false;
if (keep)
{
if (callback)
{
BasicJsonType k(key);
keep_tag = callback(depth, parse_event_t::key, k);
}
else
{
keep_tag = true;
}
}
// parse separator (:)
get_token();
if (not expect(token_type::name_separator))
{
return;
}
// parse and add value
get_token();
value.m_value.destroy(value.m_type);
value.m_type = value_t::discarded;
parse_internal(keep, value);
if (JSON_UNLIKELY(errored))
{
return;
}
if (keep and keep_tag and not value.is_discarded())
{
result.m_value.object->emplace(std::move(key), std::move(value));
}
// comma -> next value
get_token();
if (last_token == token_type::value_separator)
{
get_token();
continue;
}
// closing }
if (not expect(token_type::end_object))
{
return;
}
break;
}
if (keep and callback and not callback(--depth, parse_event_t::object_end, result))
{
result.m_value.destroy(result.m_type);
result.m_type = value_t::discarded;
}
break;
}
case token_type::begin_array:
{
if (keep)
{
if (callback)
{
keep = callback(depth++, parse_event_t::array_start, result);
}
if (not callback or keep)
{
// explicitly set result to array to cope with []
result.m_type = value_t::array;
result.m_value = value_t::array;
}
}
// read next token
get_token();
// closing ] -> we are done
if (last_token == token_type::end_array)
{
if (callback and not callback(--depth, parse_event_t::array_end, result))
{
result.m_value.destroy(result.m_type);
result.m_type = value_t::discarded;
}
break;
}
// parse values
BasicJsonType value;
while (true)
{
// parse value
value.m_value.destroy(value.m_type);
value.m_type = value_t::discarded;
parse_internal(keep, value);
if (JSON_UNLIKELY(errored))
{
return;
}
if (keep and not value.is_discarded())
{
result.m_value.array->push_back(std::move(value));
}
// comma -> next value
get_token();
if (last_token == token_type::value_separator)
{
get_token();
continue;
}
// closing ]
if (not expect(token_type::end_array))
{
return;
}
break;
}
if (keep and callback and not callback(--depth, parse_event_t::array_end, result))
{
result.m_value.destroy(result.m_type);
result.m_type = value_t::discarded;
}
break;
}
case token_type::literal_null:
{
result.m_type = value_t::null;
break;
}
case token_type::value_string:
{
result.m_type = value_t::string;
result.m_value = m_lexer.move_string();
break;
}
case token_type::literal_true:
{
result.m_type = value_t::boolean;
result.m_value = true;
break;
}
case token_type::literal_false:
{
result.m_type = value_t::boolean;
result.m_value = false;
break;
}
case token_type::value_unsigned:
{
result.m_type = value_t::number_unsigned;
result.m_value = m_lexer.get_number_unsigned();
break;
}
case token_type::value_integer:
{
result.m_type = value_t::number_integer;
result.m_value = m_lexer.get_number_integer();
break;
}
case token_type::value_float:
{
result.m_type = value_t::number_float;
result.m_value = m_lexer.get_number_float();
// throw in case of infinity or NAN
if (JSON_UNLIKELY(not std::isfinite(result.m_value.number_float)))
{
if (allow_exceptions)
{
JSON_THROW(out_of_range::create(406, "number overflow parsing '" +
m_lexer.get_token_string() + "'"));
}
expect(token_type::uninitialized);
}
break;
}
case token_type::parse_error:
{
// using "uninitialized" to avoid "expected" message
if (not expect(token_type::uninitialized))
{
return;
}
break; // LCOV_EXCL_LINE
}
default:
{
// the last token was unexpected; we expected a value
if (not expect(token_type::literal_or_value))
{
return;
}
break; // LCOV_EXCL_LINE
}
}
if (keep and callback and not callback(depth, parse_event_t::value, result))
{
result.m_type = value_t::discarded;
}
}
/*!
@brief the actual acceptor
@invariant 1. The last token is not yet processed. Therefore, the caller
of this function must make sure a token has been read.
2. When this function returns, the last token is processed.
That is, the last read character was already considered.
This invariant makes sure that no token needs to be "unput".
*/
bool accept_internal()
{
switch (last_token)
{
case token_type::begin_object:
{
// read next token
get_token();
// closing } -> we are done
if (last_token == token_type::end_object)
{
return true;
}
// parse values
while (true)
{
// parse key
if (last_token != token_type::value_string)
{
return false;
}
// parse separator (:)
get_token();
if (last_token != token_type::name_separator)
{
return false;
}
// parse value
get_token();
if (not accept_internal())
{
return false;
}
// comma -> next value
get_token();
if (last_token == token_type::value_separator)
{
get_token();
continue;
}
// closing }
return (last_token == token_type::end_object);
}
}
case token_type::begin_array:
{
// read next token
get_token();
// closing ] -> we are done
if (last_token == token_type::end_array)
{
return true;
}
// parse values
while (true)
{
// parse value
if (not accept_internal())
{
return false;
}
// comma -> next value
get_token();
if (last_token == token_type::value_separator)
{
get_token();
continue;
}
// closing ]
return (last_token == token_type::end_array);
}
}
case token_type::value_float:
{
// reject infinity or NAN
return std::isfinite(m_lexer.get_number_float());
}
case token_type::literal_false:
case token_type::literal_null:
case token_type::literal_true:
case token_type::value_integer:
case token_type::value_string:
case token_type::value_unsigned:
return true;
default: // the last token was unexpected
return false;
}
}
/// get next token from lexer
token_type get_token()
{
return (last_token = m_lexer.scan());
}
/*!
@throw parse_error.101 if expected token did not occur
*/
bool expect(token_type t)
{
if (JSON_UNLIKELY(t != last_token))
{
errored = true;
expected = t;
if (allow_exceptions)
{
throw_exception();
}
else
{
return false;
}
}
return true;
}
[[noreturn]] void throw_exception() const
{
std::string error_msg = "syntax error - ";
if (last_token == token_type::parse_error)
{
error_msg += std::string(m_lexer.get_error_message()) + "; last read: '" +
m_lexer.get_token_string() + "'";
}
else
{
error_msg += "unexpected " + std::string(lexer_t::token_type_name(last_token));
}
if (expected != token_type::uninitialized)
{
error_msg += "; expected " + std::string(lexer_t::token_type_name(expected));
}
JSON_THROW(parse_error::create(101, m_lexer.get_position(), error_msg));
}
private:
/// current level of recursion
int depth = 0;
/// callback function
const parser_callback_t callback = nullptr;
/// the type of the last read token
token_type last_token = token_type::uninitialized;
/// the lexer
lexer_t m_lexer;
/// whether a syntax error occurred
bool errored = false;
/// possible reason for the syntax error
token_type expected = token_type::uninitialized;
/// whether to throw exceptions in case of errors
const bool allow_exceptions = true;
};
}
}
#endif