minor changes
- fixed a bug that did not discard strings with control characters between 0x10 and 0x1f - added termination proofs for two important loops - made get_ref() constexpr
This commit is contained in:
parent
4c98c971b8
commit
4e7501e59a
4 changed files with 248 additions and 53 deletions
BIN
doc/images/callback_events.png
Normal file
BIN
doc/images/callback_events.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 45 KiB |
125
src/json.hpp
125
src/json.hpp
|
@ -717,7 +717,19 @@ class basic_json
|
||||||
This enumeration collects the different JSON types. It is internally used
|
This enumeration collects the different JSON types. It is internally used
|
||||||
to distinguish the stored values, and the functions @ref is_null(), @ref
|
to distinguish the stored values, and the functions @ref is_null(), @ref
|
||||||
is_object(), @ref is_array(), @ref is_string(), @ref is_boolean(), @ref
|
is_object(), @ref is_array(), @ref is_string(), @ref is_boolean(), @ref
|
||||||
is_number(), and @ref is_discarded() rely on it.
|
is_number() (with @ref is_number_integer(), @ref is_number_unsigned(), and
|
||||||
|
@ref is_number_float()), @ref is_discarded(), @ref is_primitive(), and
|
||||||
|
@ref is_structured() rely on it.
|
||||||
|
|
||||||
|
@note There are three enumeration entries (number_integer,
|
||||||
|
number_unsigned, and number_float), because the library distinguishes
|
||||||
|
these three types for numbers: @ref number_unsigned_t is used for unsigned
|
||||||
|
integers, @ref number_integer_t is used for signed integers, and @ref
|
||||||
|
number_float_t is used for floating-point numbers or to approximate
|
||||||
|
integers which do not fit in the limits of their respective type.
|
||||||
|
|
||||||
|
@sa @ref basic_json(const value_t value_type) -- create a JSON value with
|
||||||
|
the default value for a given type
|
||||||
|
|
||||||
@since version 1.0.0
|
@since version 1.0.0
|
||||||
*/
|
*/
|
||||||
|
@ -728,7 +740,7 @@ class basic_json
|
||||||
array, ///< array (ordered collection of values)
|
array, ///< array (ordered collection of values)
|
||||||
string, ///< string value
|
string, ///< string value
|
||||||
boolean, ///< boolean value
|
boolean, ///< boolean value
|
||||||
number_integer, ///< number value (integer)
|
number_integer, ///< number value (signed integer)
|
||||||
number_unsigned, ///< number value (unsigned integer)
|
number_unsigned, ///< number value (unsigned integer)
|
||||||
number_float, ///< number value (floating-point)
|
number_float, ///< number value (floating-point)
|
||||||
discarded ///< discarded by the the parser callback function
|
discarded ///< discarded by the the parser callback function
|
||||||
|
@ -758,7 +770,24 @@ class basic_json
|
||||||
/*!
|
/*!
|
||||||
@brief a JSON value
|
@brief a JSON value
|
||||||
|
|
||||||
The actual storage for a JSON value of the @ref basic_json class.
|
The actual storage for a JSON value of the @ref basic_json class. This
|
||||||
|
union combines the different storage types for the JSON value types
|
||||||
|
defined in @ref value_t.
|
||||||
|
|
||||||
|
JSON type | value_t type | used type
|
||||||
|
--------- | --------------- | ------------------------
|
||||||
|
object | object | pointer to @ref object_t
|
||||||
|
array | array | pointer to @ref array_t
|
||||||
|
string | string | pointer to @ref string_t
|
||||||
|
boolean | boolean | @ref boolean_t
|
||||||
|
number | number_integer | @ref number_integer_t
|
||||||
|
number | number_unsigned | @ref number_unsigned_t
|
||||||
|
number | number_float | @ref number_float_t
|
||||||
|
null | null | *no value is stored*
|
||||||
|
|
||||||
|
@note Variable-length types (objects, arrays, and strings) are stored as
|
||||||
|
pointers. The size of the union should not exceed 64 bits if the default
|
||||||
|
value types are used.
|
||||||
|
|
||||||
@since version 1.0.0
|
@since version 1.0.0
|
||||||
*/
|
*/
|
||||||
|
@ -874,6 +903,8 @@ class basic_json
|
||||||
This enumeration lists the parser events that can trigger calling a
|
This enumeration lists the parser events that can trigger calling a
|
||||||
callback function of type @ref parser_callback_t during parsing.
|
callback function of type @ref parser_callback_t during parsing.
|
||||||
|
|
||||||
|
@image html callback_events.png "Example when certain parse events are triggered"
|
||||||
|
|
||||||
@since version 1.0.0
|
@since version 1.0.0
|
||||||
*/
|
*/
|
||||||
enum class parse_event_t : uint8_t
|
enum class parse_event_t : uint8_t
|
||||||
|
@ -916,6 +947,8 @@ class basic_json
|
||||||
parse_event_t::array_end | the parser read `]` and finished processing a JSON array | depth of the parent of the JSON array | the parsed JSON array
|
parse_event_t::array_end | the parser read `]` and finished processing a JSON array | depth of the parent of the JSON array | the parsed JSON array
|
||||||
parse_event_t::value | the parser finished reading a JSON value | depth of the value | the parsed JSON value
|
parse_event_t::value | the parser finished reading a JSON value | depth of the value | the parsed JSON value
|
||||||
|
|
||||||
|
@image html callback_events.png "Example when certain parse events are triggered"
|
||||||
|
|
||||||
Discarding a value (i.e., returning `false`) has different effects
|
Discarding a value (i.e., returning `false`) has different effects
|
||||||
depending on the context in which function was called:
|
depending on the context in which function was called:
|
||||||
|
|
||||||
|
@ -2773,21 +2806,16 @@ class basic_json
|
||||||
type of the current JSON
|
type of the current JSON
|
||||||
*/
|
*/
|
||||||
template<typename ReferenceType, typename ThisType>
|
template<typename ReferenceType, typename ThisType>
|
||||||
static ReferenceType get_ref_impl(ThisType& obj)
|
static constexpr ReferenceType get_ref_impl(ThisType& obj)
|
||||||
{
|
{
|
||||||
// delegate the call to get_ptr<>()
|
// helper type
|
||||||
using PointerType = typename std::add_pointer<ReferenceType>::type;
|
using PointerType = typename std::add_pointer<ReferenceType>::type;
|
||||||
auto ptr = obj.template get_ptr<PointerType>();
|
|
||||||
|
|
||||||
if (ptr != nullptr)
|
// delegate the call to get_ptr<>()
|
||||||
{
|
return obj.template get_ptr<PointerType>() != nullptr
|
||||||
return *ptr;
|
? *obj.template get_ptr<PointerType>()
|
||||||
}
|
: throw std::domain_error("incompatible ReferenceType for get_ref, actual type is " +
|
||||||
else
|
obj.type_name());
|
||||||
{
|
|
||||||
throw std::domain_error("incompatible ReferenceType for get_ref, actual type is " +
|
|
||||||
obj.type_name());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
@ -3015,7 +3043,7 @@ class basic_json
|
||||||
std::is_reference<ReferenceType>::value
|
std::is_reference<ReferenceType>::value
|
||||||
and std::is_const<typename std::remove_reference<ReferenceType>::type>::value
|
and std::is_const<typename std::remove_reference<ReferenceType>::type>::value
|
||||||
, int>::type = 0>
|
, int>::type = 0>
|
||||||
ReferenceType get_ref() const
|
constexpr ReferenceType get_ref() const
|
||||||
{
|
{
|
||||||
// delegate call to get_ref_impl
|
// delegate call to get_ref_impl
|
||||||
return get_ref_impl<ReferenceType>(*this);
|
return get_ref_impl<ReferenceType>(*this);
|
||||||
|
@ -7286,6 +7314,8 @@ class basic_json
|
||||||
@throw std::invalid_argument if the low surrogate is invalid; example:
|
@throw std::invalid_argument if the low surrogate is invalid; example:
|
||||||
`""missing or wrong low surrogate""`
|
`""missing or wrong low surrogate""`
|
||||||
|
|
||||||
|
@complexity Constant.
|
||||||
|
|
||||||
@see <http://en.wikipedia.org/wiki/UTF-8#Sample_code>
|
@see <http://en.wikipedia.org/wiki/UTF-8#Sample_code>
|
||||||
*/
|
*/
|
||||||
static string_t to_unicode(const std::size_t codepoint1,
|
static string_t to_unicode(const std::size_t codepoint1,
|
||||||
|
@ -7402,6 +7432,17 @@ class basic_json
|
||||||
function consists of a large block of code with `goto` jumps.
|
function consists of a large block of code with `goto` jumps.
|
||||||
|
|
||||||
@return the class of the next token read from the buffer
|
@return the class of the next token read from the buffer
|
||||||
|
|
||||||
|
@complexity Linear in the length of the input.\n
|
||||||
|
|
||||||
|
Proposition: The loop below will always terminate for finite input.\n
|
||||||
|
|
||||||
|
Proof (by contradiction): Assume a finite input. To loop forever, the
|
||||||
|
loop must never hit code with a `break` statement. The only code
|
||||||
|
snippets without a `break` statement are the continue statements for
|
||||||
|
whitespace and byte-order-marks. To loop forever, the input must be an
|
||||||
|
infinite sequence of whitespace or byte-order-marks. This contradicts
|
||||||
|
the assumption of finite input, q.e.d.
|
||||||
*/
|
*/
|
||||||
token_type scan() noexcept
|
token_type scan() noexcept
|
||||||
{
|
{
|
||||||
|
@ -7422,8 +7463,8 @@ class basic_json
|
||||||
{
|
{
|
||||||
0, 0, 0, 0, 0, 0, 0, 0,
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
0, 32, 32, 0, 0, 32, 0, 0,
|
0, 32, 32, 0, 0, 32, 0, 0,
|
||||||
128, 128, 128, 128, 128, 128, 128, 128,
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
128, 128, 128, 128, 128, 128, 128, 128,
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
160, 128, 0, 128, 128, 128, 128, 128,
|
160, 128, 0, 128, 128, 128, 128, 128,
|
||||||
128, 128, 128, 128, 128, 128, 128, 128,
|
128, 128, 128, 128, 128, 128, 128, 128,
|
||||||
192, 192, 192, 192, 192, 192, 192, 192,
|
192, 192, 192, 192, 192, 192, 192, 192,
|
||||||
|
@ -7602,7 +7643,7 @@ basic_json_parser_6:
|
||||||
basic_json_parser_9:
|
basic_json_parser_9:
|
||||||
yyaccept = 0;
|
yyaccept = 0;
|
||||||
yych = *(m_marker = ++m_cursor);
|
yych = *(m_marker = ++m_cursor);
|
||||||
if (yych <= 0x0F)
|
if (yych <= 0x1F)
|
||||||
{
|
{
|
||||||
goto basic_json_parser_5;
|
goto basic_json_parser_5;
|
||||||
}
|
}
|
||||||
|
@ -7760,7 +7801,7 @@ basic_json_parser_32:
|
||||||
{
|
{
|
||||||
goto basic_json_parser_31;
|
goto basic_json_parser_31;
|
||||||
}
|
}
|
||||||
if (yych <= 0x0F)
|
if (yych <= 0x1F)
|
||||||
{
|
{
|
||||||
goto basic_json_parser_33;
|
goto basic_json_parser_33;
|
||||||
}
|
}
|
||||||
|
@ -8233,16 +8274,53 @@ basic_json_parser_63:
|
||||||
according to the nature of the escape. Some escapes create new
|
according to the nature of the escape. Some escapes create new
|
||||||
characters (e.g., `"\\n"` is replaced by `"\n"`), some are copied
|
characters (e.g., `"\\n"` is replaced by `"\n"`), some are copied
|
||||||
as is (e.g., `"\\\\"`). Furthermore, Unicode escapes of the shape
|
as is (e.g., `"\\\\"`). Furthermore, Unicode escapes of the shape
|
||||||
`"\\uxxxx"` need special care. In this case, to_unicode takes care
|
`"\\uxxxx"` need special care. In this case, @ref to_unicode takes
|
||||||
of the construction of the values.
|
care of the construction of the values.
|
||||||
2. Unescaped characters are copied as is.
|
2. Unescaped characters are copied as is.
|
||||||
|
|
||||||
|
@pre `m_cursor - m_start >= 2`, meaning the length of the last token
|
||||||
|
is at least 2 bytes which is trivially true for any string (which
|
||||||
|
consists of at least two quotes).
|
||||||
|
|
||||||
|
" c1 c2 c3 ... "
|
||||||
|
^ ^
|
||||||
|
m_start m_cursor
|
||||||
|
|
||||||
|
@complexity Linear in the length of the string.\n
|
||||||
|
|
||||||
|
Lemma: The loop body will always terminate.\n
|
||||||
|
|
||||||
|
Proof (by contradiction): Assume the loop body does not terminate. As
|
||||||
|
the loop body does not contain another loop, one of the called
|
||||||
|
functions must never return. The called functions are `std::strtoul`
|
||||||
|
and @ref to_unicode. Neither function can loop forever, so the loop
|
||||||
|
body will never loop forever which contradicts the assumption that the
|
||||||
|
loop body does not terminate, q.e.d.\n
|
||||||
|
|
||||||
|
Lemma: The loop condition for the for loop is eventually false.\n
|
||||||
|
|
||||||
|
Proof (by contradiction): Assume the loop does not terminate. Due to
|
||||||
|
the above lemma, this can only be due to a tautological loop
|
||||||
|
condition; that is, the loop condition i < m_cursor - 1 must always be
|
||||||
|
true. Let x be the change of i for any loop iteration. Then
|
||||||
|
m_start + 1 + x < m_cursor - 1 must hold to loop indefinitely.
|
||||||
|
This can be rephrased to m_cursor - m_start - 2 > x. With the
|
||||||
|
precondition, we x <= 0, meaning that the loop condition holds
|
||||||
|
indefinitly if i is always decreased. However, observe that the
|
||||||
|
value of i is strictly increasing with each iteration, as it is
|
||||||
|
incremented by 1 in the iteration expression and never
|
||||||
|
decremented inside the loop body. Hence, the loop condition
|
||||||
|
will eventually be false which contradicts the assumption that
|
||||||
|
the loop condition is a tautology, q.e.d.
|
||||||
|
|
||||||
@return string value of current token without opening and closing
|
@return string value of current token without opening and closing
|
||||||
quotes
|
quotes
|
||||||
@throw std::out_of_range if to_unicode fails
|
@throw std::out_of_range if to_unicode fails
|
||||||
*/
|
*/
|
||||||
string_t get_string() const
|
string_t get_string() const
|
||||||
{
|
{
|
||||||
|
assert(m_cursor - m_start >= 2);
|
||||||
|
|
||||||
string_t result;
|
string_t result;
|
||||||
result.reserve(static_cast<size_t>(m_cursor - m_start - 2));
|
result.reserve(static_cast<size_t>(m_cursor - m_start - 2));
|
||||||
|
|
||||||
|
@ -8915,6 +8993,8 @@ basic_json_parser_63:
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
@brief create and return a reference to the pointed to value
|
@brief create and return a reference to the pointed to value
|
||||||
|
|
||||||
|
@complexity Linear in the number of reference tokens.
|
||||||
*/
|
*/
|
||||||
reference get_and_create(reference j) const
|
reference get_and_create(reference j) const
|
||||||
{
|
{
|
||||||
|
@ -9352,6 +9432,7 @@ basic_json_parser_63:
|
||||||
basic_json result;
|
basic_json result;
|
||||||
|
|
||||||
// iterate the JSON object values
|
// iterate the JSON object values
|
||||||
|
assert(value.m_value.object != nullptr);
|
||||||
for (const auto& element : *value.m_value.object)
|
for (const auto& element : *value.m_value.object)
|
||||||
{
|
{
|
||||||
if (not element.second.is_primitive())
|
if (not element.second.is_primitive())
|
||||||
|
|
|
@ -717,7 +717,19 @@ class basic_json
|
||||||
This enumeration collects the different JSON types. It is internally used
|
This enumeration collects the different JSON types. It is internally used
|
||||||
to distinguish the stored values, and the functions @ref is_null(), @ref
|
to distinguish the stored values, and the functions @ref is_null(), @ref
|
||||||
is_object(), @ref is_array(), @ref is_string(), @ref is_boolean(), @ref
|
is_object(), @ref is_array(), @ref is_string(), @ref is_boolean(), @ref
|
||||||
is_number(), and @ref is_discarded() rely on it.
|
is_number() (with @ref is_number_integer(), @ref is_number_unsigned(), and
|
||||||
|
@ref is_number_float()), @ref is_discarded(), @ref is_primitive(), and
|
||||||
|
@ref is_structured() rely on it.
|
||||||
|
|
||||||
|
@note There are three enumeration entries (number_integer,
|
||||||
|
number_unsigned, and number_float), because the library distinguishes
|
||||||
|
these three types for numbers: @ref number_unsigned_t is used for unsigned
|
||||||
|
integers, @ref number_integer_t is used for signed integers, and @ref
|
||||||
|
number_float_t is used for floating-point numbers or to approximate
|
||||||
|
integers which do not fit in the limits of their respective type.
|
||||||
|
|
||||||
|
@sa @ref basic_json(const value_t value_type) -- create a JSON value with
|
||||||
|
the default value for a given type
|
||||||
|
|
||||||
@since version 1.0.0
|
@since version 1.0.0
|
||||||
*/
|
*/
|
||||||
|
@ -728,7 +740,7 @@ class basic_json
|
||||||
array, ///< array (ordered collection of values)
|
array, ///< array (ordered collection of values)
|
||||||
string, ///< string value
|
string, ///< string value
|
||||||
boolean, ///< boolean value
|
boolean, ///< boolean value
|
||||||
number_integer, ///< number value (integer)
|
number_integer, ///< number value (signed integer)
|
||||||
number_unsigned, ///< number value (unsigned integer)
|
number_unsigned, ///< number value (unsigned integer)
|
||||||
number_float, ///< number value (floating-point)
|
number_float, ///< number value (floating-point)
|
||||||
discarded ///< discarded by the the parser callback function
|
discarded ///< discarded by the the parser callback function
|
||||||
|
@ -758,7 +770,24 @@ class basic_json
|
||||||
/*!
|
/*!
|
||||||
@brief a JSON value
|
@brief a JSON value
|
||||||
|
|
||||||
The actual storage for a JSON value of the @ref basic_json class.
|
The actual storage for a JSON value of the @ref basic_json class. This
|
||||||
|
union combines the different storage types for the JSON value types
|
||||||
|
defined in @ref value_t.
|
||||||
|
|
||||||
|
JSON type | value_t type | used type
|
||||||
|
--------- | --------------- | ------------------------
|
||||||
|
object | object | pointer to @ref object_t
|
||||||
|
array | array | pointer to @ref array_t
|
||||||
|
string | string | pointer to @ref string_t
|
||||||
|
boolean | boolean | @ref boolean_t
|
||||||
|
number | number_integer | @ref number_integer_t
|
||||||
|
number | number_unsigned | @ref number_unsigned_t
|
||||||
|
number | number_float | @ref number_float_t
|
||||||
|
null | null | *no value is stored*
|
||||||
|
|
||||||
|
@note Variable-length types (objects, arrays, and strings) are stored as
|
||||||
|
pointers. The size of the union should not exceed 64 bits if the default
|
||||||
|
value types are used.
|
||||||
|
|
||||||
@since version 1.0.0
|
@since version 1.0.0
|
||||||
*/
|
*/
|
||||||
|
@ -874,6 +903,8 @@ class basic_json
|
||||||
This enumeration lists the parser events that can trigger calling a
|
This enumeration lists the parser events that can trigger calling a
|
||||||
callback function of type @ref parser_callback_t during parsing.
|
callback function of type @ref parser_callback_t during parsing.
|
||||||
|
|
||||||
|
@image html callback_events.png "Example when certain parse events are triggered"
|
||||||
|
|
||||||
@since version 1.0.0
|
@since version 1.0.0
|
||||||
*/
|
*/
|
||||||
enum class parse_event_t : uint8_t
|
enum class parse_event_t : uint8_t
|
||||||
|
@ -916,6 +947,8 @@ class basic_json
|
||||||
parse_event_t::array_end | the parser read `]` and finished processing a JSON array | depth of the parent of the JSON array | the parsed JSON array
|
parse_event_t::array_end | the parser read `]` and finished processing a JSON array | depth of the parent of the JSON array | the parsed JSON array
|
||||||
parse_event_t::value | the parser finished reading a JSON value | depth of the value | the parsed JSON value
|
parse_event_t::value | the parser finished reading a JSON value | depth of the value | the parsed JSON value
|
||||||
|
|
||||||
|
@image html callback_events.png "Example when certain parse events are triggered"
|
||||||
|
|
||||||
Discarding a value (i.e., returning `false`) has different effects
|
Discarding a value (i.e., returning `false`) has different effects
|
||||||
depending on the context in which function was called:
|
depending on the context in which function was called:
|
||||||
|
|
||||||
|
@ -2773,21 +2806,16 @@ class basic_json
|
||||||
type of the current JSON
|
type of the current JSON
|
||||||
*/
|
*/
|
||||||
template<typename ReferenceType, typename ThisType>
|
template<typename ReferenceType, typename ThisType>
|
||||||
static ReferenceType get_ref_impl(ThisType& obj)
|
static constexpr ReferenceType get_ref_impl(ThisType& obj)
|
||||||
{
|
{
|
||||||
// delegate the call to get_ptr<>()
|
// helper type
|
||||||
using PointerType = typename std::add_pointer<ReferenceType>::type;
|
using PointerType = typename std::add_pointer<ReferenceType>::type;
|
||||||
auto ptr = obj.template get_ptr<PointerType>();
|
|
||||||
|
|
||||||
if (ptr != nullptr)
|
// delegate the call to get_ptr<>()
|
||||||
{
|
return obj.template get_ptr<PointerType>() != nullptr
|
||||||
return *ptr;
|
? *obj.template get_ptr<PointerType>()
|
||||||
}
|
: throw std::domain_error("incompatible ReferenceType for get_ref, actual type is " +
|
||||||
else
|
obj.type_name());
|
||||||
{
|
|
||||||
throw std::domain_error("incompatible ReferenceType for get_ref, actual type is " +
|
|
||||||
obj.type_name());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
@ -3015,7 +3043,7 @@ class basic_json
|
||||||
std::is_reference<ReferenceType>::value
|
std::is_reference<ReferenceType>::value
|
||||||
and std::is_const<typename std::remove_reference<ReferenceType>::type>::value
|
and std::is_const<typename std::remove_reference<ReferenceType>::type>::value
|
||||||
, int>::type = 0>
|
, int>::type = 0>
|
||||||
ReferenceType get_ref() const
|
constexpr ReferenceType get_ref() const
|
||||||
{
|
{
|
||||||
// delegate call to get_ref_impl
|
// delegate call to get_ref_impl
|
||||||
return get_ref_impl<ReferenceType>(*this);
|
return get_ref_impl<ReferenceType>(*this);
|
||||||
|
@ -7286,6 +7314,8 @@ class basic_json
|
||||||
@throw std::invalid_argument if the low surrogate is invalid; example:
|
@throw std::invalid_argument if the low surrogate is invalid; example:
|
||||||
`""missing or wrong low surrogate""`
|
`""missing or wrong low surrogate""`
|
||||||
|
|
||||||
|
@complexity Constant.
|
||||||
|
|
||||||
@see <http://en.wikipedia.org/wiki/UTF-8#Sample_code>
|
@see <http://en.wikipedia.org/wiki/UTF-8#Sample_code>
|
||||||
*/
|
*/
|
||||||
static string_t to_unicode(const std::size_t codepoint1,
|
static string_t to_unicode(const std::size_t codepoint1,
|
||||||
|
@ -7402,6 +7432,17 @@ class basic_json
|
||||||
function consists of a large block of code with `goto` jumps.
|
function consists of a large block of code with `goto` jumps.
|
||||||
|
|
||||||
@return the class of the next token read from the buffer
|
@return the class of the next token read from the buffer
|
||||||
|
|
||||||
|
@complexity Linear in the length of the input.\n
|
||||||
|
|
||||||
|
Proposition: The loop below will always terminate for finite input.\n
|
||||||
|
|
||||||
|
Proof (by contradiction): Assume a finite input. To loop forever, the
|
||||||
|
loop must never hit code with a `break` statement. The only code
|
||||||
|
snippets without a `break` statement are the continue statements for
|
||||||
|
whitespace and byte-order-marks. To loop forever, the input must be an
|
||||||
|
infinite sequence of whitespace or byte-order-marks. This contradicts
|
||||||
|
the assumption of finite input, q.e.d.
|
||||||
*/
|
*/
|
||||||
token_type scan() noexcept
|
token_type scan() noexcept
|
||||||
{
|
{
|
||||||
|
@ -7447,32 +7488,32 @@ class basic_json
|
||||||
"false" { last_token_type = token_type::literal_false; break; }
|
"false" { last_token_type = token_type::literal_false; break; }
|
||||||
|
|
||||||
// number
|
// number
|
||||||
decimal_point = [.];
|
decimal_point = ".";
|
||||||
digit = [0-9];
|
digit = [0-9];
|
||||||
digit_1_9 = [1-9];
|
digit_1_9 = [1-9];
|
||||||
e = [eE];
|
e = "e" | "E";
|
||||||
minus = [-];
|
minus = "-";
|
||||||
plus = [+];
|
plus = "+";
|
||||||
zero = [0];
|
zero = "0";
|
||||||
exp = e (minus|plus)? digit+;
|
exp = e (minus | plus)? digit+;
|
||||||
frac = decimal_point digit+;
|
frac = decimal_point digit+;
|
||||||
int = (zero|digit_1_9 digit*);
|
int = (zero | digit_1_9 digit*);
|
||||||
number = minus? int frac? exp?;
|
number = minus? int frac? exp?;
|
||||||
number { last_token_type = token_type::value_number; break; }
|
number { last_token_type = token_type::value_number; break; }
|
||||||
|
|
||||||
// string
|
// string
|
||||||
quotation_mark = ["];
|
quotation_mark = "\"";
|
||||||
escape = [\\];
|
escape = "\\";
|
||||||
unescaped = [^"\\\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F];
|
unescaped = [^"\\\x00-\x1f];
|
||||||
single_escaped = ["\\/bfnrt];
|
single_escaped = "\"" | "\\" | "/" | "b" | "f" | "n" | "r" | "t";
|
||||||
unicode_escaped = [u][0-9a-fA-F]{4};
|
unicode_escaped = "u" [0-9a-fA-F]{4};
|
||||||
escaped = escape (single_escaped | unicode_escaped);
|
escaped = escape (single_escaped | unicode_escaped);
|
||||||
char = unescaped | escaped;
|
char = unescaped | escaped;
|
||||||
string = quotation_mark char* quotation_mark;
|
string = quotation_mark char* quotation_mark;
|
||||||
string { last_token_type = token_type::value_string; break; }
|
string { last_token_type = token_type::value_string; break; }
|
||||||
|
|
||||||
// end of file
|
// end of file
|
||||||
'\000' { last_token_type = token_type::end_of_input; break; }
|
"\000" { last_token_type = token_type::end_of_input; break; }
|
||||||
|
|
||||||
// anything else is an error
|
// anything else is an error
|
||||||
. { last_token_type = token_type::parse_error; break; }
|
. { last_token_type = token_type::parse_error; break; }
|
||||||
|
@ -7530,16 +7571,53 @@ class basic_json
|
||||||
according to the nature of the escape. Some escapes create new
|
according to the nature of the escape. Some escapes create new
|
||||||
characters (e.g., `"\\n"` is replaced by `"\n"`), some are copied
|
characters (e.g., `"\\n"` is replaced by `"\n"`), some are copied
|
||||||
as is (e.g., `"\\\\"`). Furthermore, Unicode escapes of the shape
|
as is (e.g., `"\\\\"`). Furthermore, Unicode escapes of the shape
|
||||||
`"\\uxxxx"` need special care. In this case, to_unicode takes care
|
`"\\uxxxx"` need special care. In this case, @ref to_unicode takes
|
||||||
of the construction of the values.
|
care of the construction of the values.
|
||||||
2. Unescaped characters are copied as is.
|
2. Unescaped characters are copied as is.
|
||||||
|
|
||||||
|
@pre `m_cursor - m_start >= 2`, meaning the length of the last token
|
||||||
|
is at least 2 bytes which is trivially true for any string (which
|
||||||
|
consists of at least two quotes).
|
||||||
|
|
||||||
|
" c1 c2 c3 ... "
|
||||||
|
^ ^
|
||||||
|
m_start m_cursor
|
||||||
|
|
||||||
|
@complexity Linear in the length of the string.\n
|
||||||
|
|
||||||
|
Lemma: The loop body will always terminate.\n
|
||||||
|
|
||||||
|
Proof (by contradiction): Assume the loop body does not terminate. As
|
||||||
|
the loop body does not contain another loop, one of the called
|
||||||
|
functions must never return. The called functions are `std::strtoul`
|
||||||
|
and @ref to_unicode. Neither function can loop forever, so the loop
|
||||||
|
body will never loop forever which contradicts the assumption that the
|
||||||
|
loop body does not terminate, q.e.d.\n
|
||||||
|
|
||||||
|
Lemma: The loop condition for the for loop is eventually false.\n
|
||||||
|
|
||||||
|
Proof (by contradiction): Assume the loop does not terminate. Due to
|
||||||
|
the above lemma, this can only be due to a tautological loop
|
||||||
|
condition; that is, the loop condition i < m_cursor - 1 must always be
|
||||||
|
true. Let x be the change of i for any loop iteration. Then
|
||||||
|
m_start + 1 + x < m_cursor - 1 must hold to loop indefinitely.
|
||||||
|
This can be rephrased to m_cursor - m_start - 2 > x. With the
|
||||||
|
precondition, we x <= 0, meaning that the loop condition holds
|
||||||
|
indefinitly if i is always decreased. However, observe that the
|
||||||
|
value of i is strictly increasing with each iteration, as it is
|
||||||
|
incremented by 1 in the iteration expression and never
|
||||||
|
decremented inside the loop body. Hence, the loop condition
|
||||||
|
will eventually be false which contradicts the assumption that
|
||||||
|
the loop condition is a tautology, q.e.d.
|
||||||
|
|
||||||
@return string value of current token without opening and closing
|
@return string value of current token without opening and closing
|
||||||
quotes
|
quotes
|
||||||
@throw std::out_of_range if to_unicode fails
|
@throw std::out_of_range if to_unicode fails
|
||||||
*/
|
*/
|
||||||
string_t get_string() const
|
string_t get_string() const
|
||||||
{
|
{
|
||||||
|
assert(m_cursor - m_start >= 2);
|
||||||
|
|
||||||
string_t result;
|
string_t result;
|
||||||
result.reserve(static_cast<size_t>(m_cursor - m_start - 2));
|
result.reserve(static_cast<size_t>(m_cursor - m_start - 2));
|
||||||
|
|
||||||
|
@ -8212,6 +8290,8 @@ class basic_json
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
@brief create and return a reference to the pointed to value
|
@brief create and return a reference to the pointed to value
|
||||||
|
|
||||||
|
@complexity Linear in the number of reference tokens.
|
||||||
*/
|
*/
|
||||||
reference get_and_create(reference j) const
|
reference get_and_create(reference j) const
|
||||||
{
|
{
|
||||||
|
@ -8649,6 +8729,7 @@ class basic_json
|
||||||
basic_json result;
|
basic_json result;
|
||||||
|
|
||||||
// iterate the JSON object values
|
// iterate the JSON object values
|
||||||
|
assert(value.m_value.object != nullptr);
|
||||||
for (const auto& element : *value.m_value.object)
|
for (const auto& element : *value.m_value.object)
|
||||||
{
|
{
|
||||||
if (not element.second.is_primitive())
|
if (not element.second.is_primitive())
|
||||||
|
|
|
@ -9716,6 +9716,39 @@ TEST_CASE("parser class")
|
||||||
CHECK_THROWS_WITH(json::parser("\"\b\"").parse(), "parse error - unexpected '\"'");
|
CHECK_THROWS_WITH(json::parser("\"\b\"").parse(), "parse error - unexpected '\"'");
|
||||||
// improve code coverage
|
// improve code coverage
|
||||||
CHECK_THROWS_AS(json::parser("\uFF01").parse(), std::invalid_argument);
|
CHECK_THROWS_AS(json::parser("\uFF01").parse(), std::invalid_argument);
|
||||||
|
// unescaped control characters
|
||||||
|
CHECK_THROWS_AS(json::parser("\"\x00\"").parse(), std::invalid_argument);
|
||||||
|
CHECK_THROWS_AS(json::parser("\"\x01\"").parse(), std::invalid_argument);
|
||||||
|
CHECK_THROWS_AS(json::parser("\"\x02\"").parse(), std::invalid_argument);
|
||||||
|
CHECK_THROWS_AS(json::parser("\"\x03\"").parse(), std::invalid_argument);
|
||||||
|
CHECK_THROWS_AS(json::parser("\"\x04\"").parse(), std::invalid_argument);
|
||||||
|
CHECK_THROWS_AS(json::parser("\"\x05\"").parse(), std::invalid_argument);
|
||||||
|
CHECK_THROWS_AS(json::parser("\"\x06\"").parse(), std::invalid_argument);
|
||||||
|
CHECK_THROWS_AS(json::parser("\"\x07\"").parse(), std::invalid_argument);
|
||||||
|
CHECK_THROWS_AS(json::parser("\"\x08\"").parse(), std::invalid_argument);
|
||||||
|
CHECK_THROWS_AS(json::parser("\"\x09\"").parse(), std::invalid_argument);
|
||||||
|
CHECK_THROWS_AS(json::parser("\"\x0a\"").parse(), std::invalid_argument);
|
||||||
|
CHECK_THROWS_AS(json::parser("\"\x0b\"").parse(), std::invalid_argument);
|
||||||
|
CHECK_THROWS_AS(json::parser("\"\x0c\"").parse(), std::invalid_argument);
|
||||||
|
CHECK_THROWS_AS(json::parser("\"\x0d\"").parse(), std::invalid_argument);
|
||||||
|
CHECK_THROWS_AS(json::parser("\"\x0e\"").parse(), std::invalid_argument);
|
||||||
|
CHECK_THROWS_AS(json::parser("\"\x0f\"").parse(), std::invalid_argument);
|
||||||
|
CHECK_THROWS_AS(json::parser("\"\x10\"").parse(), std::invalid_argument);
|
||||||
|
CHECK_THROWS_AS(json::parser("\"\x11\"").parse(), std::invalid_argument);
|
||||||
|
CHECK_THROWS_AS(json::parser("\"\x12\"").parse(), std::invalid_argument);
|
||||||
|
CHECK_THROWS_AS(json::parser("\"\x13\"").parse(), std::invalid_argument);
|
||||||
|
CHECK_THROWS_AS(json::parser("\"\x14\"").parse(), std::invalid_argument);
|
||||||
|
CHECK_THROWS_AS(json::parser("\"\x15\"").parse(), std::invalid_argument);
|
||||||
|
CHECK_THROWS_AS(json::parser("\"\x16\"").parse(), std::invalid_argument);
|
||||||
|
CHECK_THROWS_AS(json::parser("\"\x17\"").parse(), std::invalid_argument);
|
||||||
|
CHECK_THROWS_AS(json::parser("\"\x18\"").parse(), std::invalid_argument);
|
||||||
|
CHECK_THROWS_AS(json::parser("\"\x19\"").parse(), std::invalid_argument);
|
||||||
|
CHECK_THROWS_AS(json::parser("\"\x1a\"").parse(), std::invalid_argument);
|
||||||
|
CHECK_THROWS_AS(json::parser("\"\x1b\"").parse(), std::invalid_argument);
|
||||||
|
CHECK_THROWS_AS(json::parser("\"\x1c\"").parse(), std::invalid_argument);
|
||||||
|
CHECK_THROWS_AS(json::parser("\"\x1d\"").parse(), std::invalid_argument);
|
||||||
|
CHECK_THROWS_AS(json::parser("\"\x1e\"").parse(), std::invalid_argument);
|
||||||
|
CHECK_THROWS_AS(json::parser("\"\x1f\"").parse(), std::invalid_argument);
|
||||||
}
|
}
|
||||||
|
|
||||||
SECTION("escaped")
|
SECTION("escaped")
|
||||||
|
|
Loading…
Reference in a new issue