minor changes

- fixed a bug that did not discard strings with control characters
between 0x10 and 0x1f
- added termination proofs for two important loops
- made get_ref() constexpr
This commit is contained in:
Niels 2016-07-22 15:34:45 +02:00
parent 4c98c971b8
commit 4e7501e59a
4 changed files with 248 additions and 53 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 45 KiB

View file

@ -717,7 +717,19 @@ class basic_json
This enumeration collects the different JSON types. It is internally used
to distinguish the stored values, and the functions @ref is_null(), @ref
is_object(), @ref is_array(), @ref is_string(), @ref is_boolean(), @ref
is_number(), and @ref is_discarded() rely on it.
is_number() (with @ref is_number_integer(), @ref is_number_unsigned(), and
@ref is_number_float()), @ref is_discarded(), @ref is_primitive(), and
@ref is_structured() rely on it.
@note There are three enumeration entries (number_integer,
number_unsigned, and number_float), because the library distinguishes
these three types for numbers: @ref number_unsigned_t is used for unsigned
integers, @ref number_integer_t is used for signed integers, and @ref
number_float_t is used for floating-point numbers or to approximate
integers which do not fit in the limits of their respective type.
@sa @ref basic_json(const value_t value_type) -- create a JSON value with
the default value for a given type
@since version 1.0.0
*/
@ -728,7 +740,7 @@ class basic_json
array, ///< array (ordered collection of values)
string, ///< string value
boolean, ///< boolean value
number_integer, ///< number value (integer)
number_integer, ///< number value (signed integer)
number_unsigned, ///< number value (unsigned integer)
number_float, ///< number value (floating-point)
discarded ///< discarded by the the parser callback function
@ -758,7 +770,24 @@ class basic_json
/*!
@brief a JSON value
The actual storage for a JSON value of the @ref basic_json class.
The actual storage for a JSON value of the @ref basic_json class. This
union combines the different storage types for the JSON value types
defined in @ref value_t.
JSON type | value_t type | used type
--------- | --------------- | ------------------------
object | object | pointer to @ref object_t
array | array | pointer to @ref array_t
string | string | pointer to @ref string_t
boolean | boolean | @ref boolean_t
number | number_integer | @ref number_integer_t
number | number_unsigned | @ref number_unsigned_t
number | number_float | @ref number_float_t
null | null | *no value is stored*
@note Variable-length types (objects, arrays, and strings) are stored as
pointers. The size of the union should not exceed 64 bits if the default
value types are used.
@since version 1.0.0
*/
@ -874,6 +903,8 @@ class basic_json
This enumeration lists the parser events that can trigger calling a
callback function of type @ref parser_callback_t during parsing.
@image html callback_events.png "Example when certain parse events are triggered"
@since version 1.0.0
*/
enum class parse_event_t : uint8_t
@ -916,6 +947,8 @@ class basic_json
parse_event_t::array_end | the parser read `]` and finished processing a JSON array | depth of the parent of the JSON array | the parsed JSON array
parse_event_t::value | the parser finished reading a JSON value | depth of the value | the parsed JSON value
@image html callback_events.png "Example when certain parse events are triggered"
Discarding a value (i.e., returning `false`) has different effects
depending on the context in which function was called:
@ -2773,21 +2806,16 @@ class basic_json
type of the current JSON
*/
template<typename ReferenceType, typename ThisType>
static ReferenceType get_ref_impl(ThisType& obj)
static constexpr ReferenceType get_ref_impl(ThisType& obj)
{
// delegate the call to get_ptr<>()
// helper type
using PointerType = typename std::add_pointer<ReferenceType>::type;
auto ptr = obj.template get_ptr<PointerType>();
if (ptr != nullptr)
{
return *ptr;
}
else
{
throw std::domain_error("incompatible ReferenceType for get_ref, actual type is " +
obj.type_name());
}
// delegate the call to get_ptr<>()
return obj.template get_ptr<PointerType>() != nullptr
? *obj.template get_ptr<PointerType>()
: throw std::domain_error("incompatible ReferenceType for get_ref, actual type is " +
obj.type_name());
}
public:
@ -3015,7 +3043,7 @@ class basic_json
std::is_reference<ReferenceType>::value
and std::is_const<typename std::remove_reference<ReferenceType>::type>::value
, int>::type = 0>
ReferenceType get_ref() const
constexpr ReferenceType get_ref() const
{
// delegate call to get_ref_impl
return get_ref_impl<ReferenceType>(*this);
@ -7286,6 +7314,8 @@ class basic_json
@throw std::invalid_argument if the low surrogate is invalid; example:
`""missing or wrong low surrogate""`
@complexity Constant.
@see <http://en.wikipedia.org/wiki/UTF-8#Sample_code>
*/
static string_t to_unicode(const std::size_t codepoint1,
@ -7402,6 +7432,17 @@ class basic_json
function consists of a large block of code with `goto` jumps.
@return the class of the next token read from the buffer
@complexity Linear in the length of the input.\n
Proposition: The loop below will always terminate for finite input.\n
Proof (by contradiction): Assume a finite input. To loop forever, the
loop must never hit code with a `break` statement. The only code
snippets without a `break` statement are the continue statements for
whitespace and byte-order-marks. To loop forever, the input must be an
infinite sequence of whitespace or byte-order-marks. This contradicts
the assumption of finite input, q.e.d.
*/
token_type scan() noexcept
{
@ -7422,8 +7463,8 @@ class basic_json
{
0, 0, 0, 0, 0, 0, 0, 0,
0, 32, 32, 0, 0, 32, 0, 0,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
160, 128, 0, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
192, 192, 192, 192, 192, 192, 192, 192,
@ -7602,7 +7643,7 @@ basic_json_parser_6:
basic_json_parser_9:
yyaccept = 0;
yych = *(m_marker = ++m_cursor);
if (yych <= 0x0F)
if (yych <= 0x1F)
{
goto basic_json_parser_5;
}
@ -7760,7 +7801,7 @@ basic_json_parser_32:
{
goto basic_json_parser_31;
}
if (yych <= 0x0F)
if (yych <= 0x1F)
{
goto basic_json_parser_33;
}
@ -8233,16 +8274,53 @@ basic_json_parser_63:
according to the nature of the escape. Some escapes create new
characters (e.g., `"\\n"` is replaced by `"\n"`), some are copied
as is (e.g., `"\\\\"`). Furthermore, Unicode escapes of the shape
`"\\uxxxx"` need special care. In this case, to_unicode takes care
of the construction of the values.
`"\\uxxxx"` need special care. In this case, @ref to_unicode takes
care of the construction of the values.
2. Unescaped characters are copied as is.
@pre `m_cursor - m_start >= 2`, meaning the length of the last token
is at least 2 bytes which is trivially true for any string (which
consists of at least two quotes).
" c1 c2 c3 ... "
^ ^
m_start m_cursor
@complexity Linear in the length of the string.\n
Lemma: The loop body will always terminate.\n
Proof (by contradiction): Assume the loop body does not terminate. As
the loop body does not contain another loop, one of the called
functions must never return. The called functions are `std::strtoul`
and @ref to_unicode. Neither function can loop forever, so the loop
body will never loop forever which contradicts the assumption that the
loop body does not terminate, q.e.d.\n
Lemma: The loop condition for the for loop is eventually false.\n
Proof (by contradiction): Assume the loop does not terminate. Due to
the above lemma, this can only be due to a tautological loop
condition; that is, the loop condition i < m_cursor - 1 must always be
true. Let x be the change of i for any loop iteration. Then
m_start + 1 + x < m_cursor - 1 must hold to loop indefinitely.
This can be rephrased to m_cursor - m_start - 2 > x. With the
precondition, we x <= 0, meaning that the loop condition holds
indefinitly if i is always decreased. However, observe that the
value of i is strictly increasing with each iteration, as it is
incremented by 1 in the iteration expression and never
decremented inside the loop body. Hence, the loop condition
will eventually be false which contradicts the assumption that
the loop condition is a tautology, q.e.d.
@return string value of current token without opening and closing
quotes
@throw std::out_of_range if to_unicode fails
*/
string_t get_string() const
{
assert(m_cursor - m_start >= 2);
string_t result;
result.reserve(static_cast<size_t>(m_cursor - m_start - 2));
@ -8915,6 +8993,8 @@ basic_json_parser_63:
/*!
@brief create and return a reference to the pointed to value
@complexity Linear in the number of reference tokens.
*/
reference get_and_create(reference j) const
{
@ -9352,6 +9432,7 @@ basic_json_parser_63:
basic_json result;
// iterate the JSON object values
assert(value.m_value.object != nullptr);
for (const auto& element : *value.m_value.object)
{
if (not element.second.is_primitive())

View file

@ -717,7 +717,19 @@ class basic_json
This enumeration collects the different JSON types. It is internally used
to distinguish the stored values, and the functions @ref is_null(), @ref
is_object(), @ref is_array(), @ref is_string(), @ref is_boolean(), @ref
is_number(), and @ref is_discarded() rely on it.
is_number() (with @ref is_number_integer(), @ref is_number_unsigned(), and
@ref is_number_float()), @ref is_discarded(), @ref is_primitive(), and
@ref is_structured() rely on it.
@note There are three enumeration entries (number_integer,
number_unsigned, and number_float), because the library distinguishes
these three types for numbers: @ref number_unsigned_t is used for unsigned
integers, @ref number_integer_t is used for signed integers, and @ref
number_float_t is used for floating-point numbers or to approximate
integers which do not fit in the limits of their respective type.
@sa @ref basic_json(const value_t value_type) -- create a JSON value with
the default value for a given type
@since version 1.0.0
*/
@ -728,7 +740,7 @@ class basic_json
array, ///< array (ordered collection of values)
string, ///< string value
boolean, ///< boolean value
number_integer, ///< number value (integer)
number_integer, ///< number value (signed integer)
number_unsigned, ///< number value (unsigned integer)
number_float, ///< number value (floating-point)
discarded ///< discarded by the the parser callback function
@ -758,7 +770,24 @@ class basic_json
/*!
@brief a JSON value
The actual storage for a JSON value of the @ref basic_json class.
The actual storage for a JSON value of the @ref basic_json class. This
union combines the different storage types for the JSON value types
defined in @ref value_t.
JSON type | value_t type | used type
--------- | --------------- | ------------------------
object | object | pointer to @ref object_t
array | array | pointer to @ref array_t
string | string | pointer to @ref string_t
boolean | boolean | @ref boolean_t
number | number_integer | @ref number_integer_t
number | number_unsigned | @ref number_unsigned_t
number | number_float | @ref number_float_t
null | null | *no value is stored*
@note Variable-length types (objects, arrays, and strings) are stored as
pointers. The size of the union should not exceed 64 bits if the default
value types are used.
@since version 1.0.0
*/
@ -874,6 +903,8 @@ class basic_json
This enumeration lists the parser events that can trigger calling a
callback function of type @ref parser_callback_t during parsing.
@image html callback_events.png "Example when certain parse events are triggered"
@since version 1.0.0
*/
enum class parse_event_t : uint8_t
@ -916,6 +947,8 @@ class basic_json
parse_event_t::array_end | the parser read `]` and finished processing a JSON array | depth of the parent of the JSON array | the parsed JSON array
parse_event_t::value | the parser finished reading a JSON value | depth of the value | the parsed JSON value
@image html callback_events.png "Example when certain parse events are triggered"
Discarding a value (i.e., returning `false`) has different effects
depending on the context in which function was called:
@ -2773,21 +2806,16 @@ class basic_json
type of the current JSON
*/
template<typename ReferenceType, typename ThisType>
static ReferenceType get_ref_impl(ThisType& obj)
static constexpr ReferenceType get_ref_impl(ThisType& obj)
{
// delegate the call to get_ptr<>()
// helper type
using PointerType = typename std::add_pointer<ReferenceType>::type;
auto ptr = obj.template get_ptr<PointerType>();
if (ptr != nullptr)
{
return *ptr;
}
else
{
throw std::domain_error("incompatible ReferenceType for get_ref, actual type is " +
obj.type_name());
}
// delegate the call to get_ptr<>()
return obj.template get_ptr<PointerType>() != nullptr
? *obj.template get_ptr<PointerType>()
: throw std::domain_error("incompatible ReferenceType for get_ref, actual type is " +
obj.type_name());
}
public:
@ -3015,7 +3043,7 @@ class basic_json
std::is_reference<ReferenceType>::value
and std::is_const<typename std::remove_reference<ReferenceType>::type>::value
, int>::type = 0>
ReferenceType get_ref() const
constexpr ReferenceType get_ref() const
{
// delegate call to get_ref_impl
return get_ref_impl<ReferenceType>(*this);
@ -7286,6 +7314,8 @@ class basic_json
@throw std::invalid_argument if the low surrogate is invalid; example:
`""missing or wrong low surrogate""`
@complexity Constant.
@see <http://en.wikipedia.org/wiki/UTF-8#Sample_code>
*/
static string_t to_unicode(const std::size_t codepoint1,
@ -7402,6 +7432,17 @@ class basic_json
function consists of a large block of code with `goto` jumps.
@return the class of the next token read from the buffer
@complexity Linear in the length of the input.\n
Proposition: The loop below will always terminate for finite input.\n
Proof (by contradiction): Assume a finite input. To loop forever, the
loop must never hit code with a `break` statement. The only code
snippets without a `break` statement are the continue statements for
whitespace and byte-order-marks. To loop forever, the input must be an
infinite sequence of whitespace or byte-order-marks. This contradicts
the assumption of finite input, q.e.d.
*/
token_type scan() noexcept
{
@ -7447,32 +7488,32 @@ class basic_json
"false" { last_token_type = token_type::literal_false; break; }
// number
decimal_point = [.];
decimal_point = ".";
digit = [0-9];
digit_1_9 = [1-9];
e = [eE];
minus = [-];
plus = [+];
zero = [0];
exp = e (minus|plus)? digit+;
e = "e" | "E";
minus = "-";
plus = "+";
zero = "0";
exp = e (minus | plus)? digit+;
frac = decimal_point digit+;
int = (zero|digit_1_9 digit*);
int = (zero | digit_1_9 digit*);
number = minus? int frac? exp?;
number { last_token_type = token_type::value_number; break; }
// string
quotation_mark = ["];
escape = [\\];
unescaped = [^"\\\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F];
single_escaped = ["\\/bfnrt];
unicode_escaped = [u][0-9a-fA-F]{4};
quotation_mark = "\"";
escape = "\\";
unescaped = [^"\\\x00-\x1f];
single_escaped = "\"" | "\\" | "/" | "b" | "f" | "n" | "r" | "t";
unicode_escaped = "u" [0-9a-fA-F]{4};
escaped = escape (single_escaped | unicode_escaped);
char = unescaped | escaped;
string = quotation_mark char* quotation_mark;
string { last_token_type = token_type::value_string; break; }
// end of file
'\000' { last_token_type = token_type::end_of_input; break; }
"\000" { last_token_type = token_type::end_of_input; break; }
// anything else is an error
. { last_token_type = token_type::parse_error; break; }
@ -7530,16 +7571,53 @@ class basic_json
according to the nature of the escape. Some escapes create new
characters (e.g., `"\\n"` is replaced by `"\n"`), some are copied
as is (e.g., `"\\\\"`). Furthermore, Unicode escapes of the shape
`"\\uxxxx"` need special care. In this case, to_unicode takes care
of the construction of the values.
`"\\uxxxx"` need special care. In this case, @ref to_unicode takes
care of the construction of the values.
2. Unescaped characters are copied as is.
@pre `m_cursor - m_start >= 2`, meaning the length of the last token
is at least 2 bytes which is trivially true for any string (which
consists of at least two quotes).
" c1 c2 c3 ... "
^ ^
m_start m_cursor
@complexity Linear in the length of the string.\n
Lemma: The loop body will always terminate.\n
Proof (by contradiction): Assume the loop body does not terminate. As
the loop body does not contain another loop, one of the called
functions must never return. The called functions are `std::strtoul`
and @ref to_unicode. Neither function can loop forever, so the loop
body will never loop forever which contradicts the assumption that the
loop body does not terminate, q.e.d.\n
Lemma: The loop condition for the for loop is eventually false.\n
Proof (by contradiction): Assume the loop does not terminate. Due to
the above lemma, this can only be due to a tautological loop
condition; that is, the loop condition i < m_cursor - 1 must always be
true. Let x be the change of i for any loop iteration. Then
m_start + 1 + x < m_cursor - 1 must hold to loop indefinitely.
This can be rephrased to m_cursor - m_start - 2 > x. With the
precondition, we x <= 0, meaning that the loop condition holds
indefinitly if i is always decreased. However, observe that the
value of i is strictly increasing with each iteration, as it is
incremented by 1 in the iteration expression and never
decremented inside the loop body. Hence, the loop condition
will eventually be false which contradicts the assumption that
the loop condition is a tautology, q.e.d.
@return string value of current token without opening and closing
quotes
@throw std::out_of_range if to_unicode fails
*/
string_t get_string() const
{
assert(m_cursor - m_start >= 2);
string_t result;
result.reserve(static_cast<size_t>(m_cursor - m_start - 2));
@ -8212,6 +8290,8 @@ class basic_json
/*!
@brief create and return a reference to the pointed to value
@complexity Linear in the number of reference tokens.
*/
reference get_and_create(reference j) const
{
@ -8649,6 +8729,7 @@ class basic_json
basic_json result;
// iterate the JSON object values
assert(value.m_value.object != nullptr);
for (const auto& element : *value.m_value.object)
{
if (not element.second.is_primitive())

View file

@ -9716,6 +9716,39 @@ TEST_CASE("parser class")
CHECK_THROWS_WITH(json::parser("\"\b\"").parse(), "parse error - unexpected '\"'");
// improve code coverage
CHECK_THROWS_AS(json::parser("\uFF01").parse(), std::invalid_argument);
// unescaped control characters
CHECK_THROWS_AS(json::parser("\"\x00\"").parse(), std::invalid_argument);
CHECK_THROWS_AS(json::parser("\"\x01\"").parse(), std::invalid_argument);
CHECK_THROWS_AS(json::parser("\"\x02\"").parse(), std::invalid_argument);
CHECK_THROWS_AS(json::parser("\"\x03\"").parse(), std::invalid_argument);
CHECK_THROWS_AS(json::parser("\"\x04\"").parse(), std::invalid_argument);
CHECK_THROWS_AS(json::parser("\"\x05\"").parse(), std::invalid_argument);
CHECK_THROWS_AS(json::parser("\"\x06\"").parse(), std::invalid_argument);
CHECK_THROWS_AS(json::parser("\"\x07\"").parse(), std::invalid_argument);
CHECK_THROWS_AS(json::parser("\"\x08\"").parse(), std::invalid_argument);
CHECK_THROWS_AS(json::parser("\"\x09\"").parse(), std::invalid_argument);
CHECK_THROWS_AS(json::parser("\"\x0a\"").parse(), std::invalid_argument);
CHECK_THROWS_AS(json::parser("\"\x0b\"").parse(), std::invalid_argument);
CHECK_THROWS_AS(json::parser("\"\x0c\"").parse(), std::invalid_argument);
CHECK_THROWS_AS(json::parser("\"\x0d\"").parse(), std::invalid_argument);
CHECK_THROWS_AS(json::parser("\"\x0e\"").parse(), std::invalid_argument);
CHECK_THROWS_AS(json::parser("\"\x0f\"").parse(), std::invalid_argument);
CHECK_THROWS_AS(json::parser("\"\x10\"").parse(), std::invalid_argument);
CHECK_THROWS_AS(json::parser("\"\x11\"").parse(), std::invalid_argument);
CHECK_THROWS_AS(json::parser("\"\x12\"").parse(), std::invalid_argument);
CHECK_THROWS_AS(json::parser("\"\x13\"").parse(), std::invalid_argument);
CHECK_THROWS_AS(json::parser("\"\x14\"").parse(), std::invalid_argument);
CHECK_THROWS_AS(json::parser("\"\x15\"").parse(), std::invalid_argument);
CHECK_THROWS_AS(json::parser("\"\x16\"").parse(), std::invalid_argument);
CHECK_THROWS_AS(json::parser("\"\x17\"").parse(), std::invalid_argument);
CHECK_THROWS_AS(json::parser("\"\x18\"").parse(), std::invalid_argument);
CHECK_THROWS_AS(json::parser("\"\x19\"").parse(), std::invalid_argument);
CHECK_THROWS_AS(json::parser("\"\x1a\"").parse(), std::invalid_argument);
CHECK_THROWS_AS(json::parser("\"\x1b\"").parse(), std::invalid_argument);
CHECK_THROWS_AS(json::parser("\"\x1c\"").parse(), std::invalid_argument);
CHECK_THROWS_AS(json::parser("\"\x1d\"").parse(), std::invalid_argument);
CHECK_THROWS_AS(json::parser("\"\x1e\"").parse(), std::invalid_argument);
CHECK_THROWS_AS(json::parser("\"\x1f\"").parse(), std::invalid_argument);
}
SECTION("escaped")