Merge branch 'feature/manual_lexer' into develop

This commit is contained in:
Niels Lohmann 2017-05-05 18:27:56 +02:00
commit 56ac7908f1
No known key found for this signature in database
GPG key ID: 7F3CEA63AE251B69
14 changed files with 5887 additions and 17227 deletions

3
.gitignore vendored
View file

@ -30,3 +30,6 @@ test/parse_afl_fuzzer
test/parse_cbor_fuzzer
test/parse_msgpack_fuzzer
minibench

View file

@ -1,9 +1,5 @@
.PHONY: pretty clean ChangeLog.md
# used programs
RE2C := $(shell command -v re2c 2> /dev/null)
SED = sed
# main target
all:
$(MAKE) -C test
@ -51,7 +47,8 @@ doctest:
# -Wno-keyword-macro: unit-tests use "#define private public"
# -Wno-deprecated-declarations: the library deprecated some functions
# -Wno-weak-vtables: exception class is defined inline, but has virtual method
# -Wno-range-loop-analysis: iterator_wrapper tests tests "for(const auto i...)"
# -Wno-range-loop-analysis: iterator_wrapper tests "for(const auto i...)"
# -Wno-float-equal: not all comparisons in the tests can be replaced by Approx
pedantic_clang:
$(MAKE) json_unit CXXFLAGS="\
-std=c++11 \
@ -62,7 +59,8 @@ pedantic_clang:
-Wno-keyword-macro \
-Wno-deprecated-declarations \
-Wno-weak-vtables \
-Wno-range-loop-analysis"
-Wno-range-loop-analysis \
-Wno-float-equal"
# calling GCC with most warnings
pedantic_gcc:
@ -186,13 +184,6 @@ clang_sanitize: clean
# maintainer targets
##########################################################################
# create scanner with re2c
re2c: src/json.hpp.re2c
ifndef RE2C
$(error "re2c is not available, please install re2c")
endif
$(RE2C) -W --utf-8 --encoding-policy fail --bit-vectors --nested-ifs --no-debug-info $< | $(SED) '1d' > src/json.hpp
# pretty printer
pretty:
astyle --style=allman --indent=spaces=4 --indent-modifiers \
@ -200,7 +191,7 @@ pretty:
--indent-col1-comments --pad-oper --pad-header --align-pointer=type \
--align-reference=type --add-brackets --convert-tabs --close-templates \
--lineend=linux --preserve-date --suffix=none --formatted \
src/json.hpp src/json.hpp.re2c test/src/*.cpp \
src/json.hpp test/src/*.cpp \
benchmarks/benchmarks.cpp doc/examples/*.cpp

View file

@ -899,7 +899,7 @@ $ make json_unit -Ctest
$ ./test/json_unit "*"
===============================================================================
All tests passed (11203022 assertions in 48 test cases)
All tests passed (13391115 assertions in 49 test cases)
```
Alternatively, you can use [CMake](https://cmake.org) and run

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -28,7 +28,6 @@ SOFTWARE.
#include "catch.hpp"
#define private public
#include "json.hpp"
using nlohmann::json;
@ -728,14 +727,9 @@ TEST_CASE("CBOR")
const auto result = json::to_cbor(j);
CHECK(result == expected);
// restore value (reverse array for endianess)
double restored;
std::reverse(expected.begin(), expected.end());
memcpy(&restored, expected.data(), sizeof(double));
CHECK(restored == v);
// roundtrip
CHECK(json::from_cbor(result) == j);
CHECK(json::from_cbor(result) == v);
}
}
@ -1166,35 +1160,35 @@ TEST_CASE("CBOR")
CHECK_THROWS_AS(json::from_cbor(std::vector<uint8_t>({0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00})), json::parse_error);
CHECK_THROWS_WITH(json::from_cbor(std::vector<uint8_t>({0x18})),
"[json.exception.parse_error.110] parse error at 2: cannot read 1 bytes from vector");
"[json.exception.parse_error.110] parse error at 2: unexpected end of input");
CHECK_THROWS_WITH(json::from_cbor(std::vector<uint8_t>({0x19})),
"[json.exception.parse_error.110] parse error at 2: cannot read 2 bytes from vector");
"[json.exception.parse_error.110] parse error at 2: unexpected end of input");
CHECK_THROWS_WITH(json::from_cbor(std::vector<uint8_t>({0x19, 0x00})),
"[json.exception.parse_error.110] parse error at 2: cannot read 2 bytes from vector");
"[json.exception.parse_error.110] parse error at 3: unexpected end of input");
CHECK_THROWS_WITH(json::from_cbor(std::vector<uint8_t>({0x1a})),
"[json.exception.parse_error.110] parse error at 2: cannot read 4 bytes from vector");
"[json.exception.parse_error.110] parse error at 2: unexpected end of input");
CHECK_THROWS_WITH(json::from_cbor(std::vector<uint8_t>({0x1a, 0x00})),
"[json.exception.parse_error.110] parse error at 2: cannot read 4 bytes from vector");
"[json.exception.parse_error.110] parse error at 3: unexpected end of input");
CHECK_THROWS_WITH(json::from_cbor(std::vector<uint8_t>({0x1a, 0x00, 0x00})),
"[json.exception.parse_error.110] parse error at 2: cannot read 4 bytes from vector");
"[json.exception.parse_error.110] parse error at 4: unexpected end of input");
CHECK_THROWS_WITH(json::from_cbor(std::vector<uint8_t>({0x1a, 0x00, 0x00, 0x00})),
"[json.exception.parse_error.110] parse error at 2: cannot read 4 bytes from vector");
"[json.exception.parse_error.110] parse error at 5: unexpected end of input");
CHECK_THROWS_WITH(json::from_cbor(std::vector<uint8_t>({0x1b})),
"[json.exception.parse_error.110] parse error at 2: cannot read 8 bytes from vector");
"[json.exception.parse_error.110] parse error at 2: unexpected end of input");
CHECK_THROWS_WITH(json::from_cbor(std::vector<uint8_t>({0x1b, 0x00})),
"[json.exception.parse_error.110] parse error at 2: cannot read 8 bytes from vector");
"[json.exception.parse_error.110] parse error at 3: unexpected end of input");
CHECK_THROWS_WITH(json::from_cbor(std::vector<uint8_t>({0x1b, 0x00, 0x00})),
"[json.exception.parse_error.110] parse error at 2: cannot read 8 bytes from vector");
"[json.exception.parse_error.110] parse error at 4: unexpected end of input");
CHECK_THROWS_WITH(json::from_cbor(std::vector<uint8_t>({0x1b, 0x00, 0x00, 0x00})),
"[json.exception.parse_error.110] parse error at 2: cannot read 8 bytes from vector");
"[json.exception.parse_error.110] parse error at 5: unexpected end of input");
CHECK_THROWS_WITH(json::from_cbor(std::vector<uint8_t>({0x1b, 0x00, 0x00, 0x00, 0x00})),
"[json.exception.parse_error.110] parse error at 2: cannot read 8 bytes from vector");
"[json.exception.parse_error.110] parse error at 6: unexpected end of input");
CHECK_THROWS_WITH(json::from_cbor(std::vector<uint8_t>({0x1b, 0x00, 0x00, 0x00, 0x00, 0x00})),
"[json.exception.parse_error.110] parse error at 2: cannot read 8 bytes from vector");
"[json.exception.parse_error.110] parse error at 7: unexpected end of input");
CHECK_THROWS_WITH(json::from_cbor(std::vector<uint8_t>({0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00})),
"[json.exception.parse_error.110] parse error at 2: cannot read 8 bytes from vector");
"[json.exception.parse_error.110] parse error at 8: unexpected end of input");
CHECK_THROWS_WITH(json::from_cbor(std::vector<uint8_t>({0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00})),
"[json.exception.parse_error.110] parse error at 2: cannot read 8 bytes from vector");
"[json.exception.parse_error.110] parse error at 9: unexpected end of input");
}
SECTION("unsupported bytes")
@ -1357,12 +1351,6 @@ TEST_CASE("CBOR regressions", "[!throws]")
}
}
}
SECTION("improve code coverage")
{
// exotic edge case
CHECK_THROWS_AS(json::check_length(0xffffffffffffffffull, 0xfffffffffffffff0ull, 0xff), json::parse_error);
}
}
TEST_CASE("CBOR roundtrips", "[hide]")
@ -1756,7 +1744,7 @@ TEST_CASE("examples from RFC 7049 Appendix A")
CHECK(json::parse("\"\\ud800\\udd51\"") == json::from_cbor(std::vector<uint8_t>({0x64, 0xf0, 0x90, 0x85, 0x91})));
// indefinite length strings
CHECK(json::parse("\"streaming\"") == json::from_cbor(std::vector<uint8_t>({0x7f, 0x65, 0x73, 0x74, 0x72, 0x65, 0x61, 0x64, 0x6d, 0x69, 0x6e, 0x67, 0xff})));
CHECK(json::parse("\"streaming\"") == json::from_cbor(std::vector<uint8_t>({0x7f, 0x73, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x69, 0x6e, 0x67, 0xff})));
}
SECTION("arrays")

View file

@ -32,106 +32,84 @@ SOFTWARE.
#include "json.hpp"
using nlohmann::json;
// shortcut to scan a string literal
json::lexer::token_type scan_string(const char* s);
json::lexer::token_type scan_string(const char* s)
{
return json::lexer(json::input_adapter::create(s)).scan();
}
TEST_CASE("lexer class")
{
SECTION("scan")
{
SECTION("structural characters")
{
CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("["),
1).scan() == json::lexer::token_type::begin_array));
CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("]"),
1).scan() == json::lexer::token_type::end_array));
CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("{"),
1).scan() == json::lexer::token_type::begin_object));
CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("}"),
1).scan() == json::lexer::token_type::end_object));
CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>(","),
1).scan() == json::lexer::token_type::value_separator));
CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>(":"),
1).scan() == json::lexer::token_type::name_separator));
CHECK((scan_string("[") == json::lexer::token_type::begin_array));
CHECK((scan_string("]") == json::lexer::token_type::end_array));
CHECK((scan_string("{") == json::lexer::token_type::begin_object));
CHECK((scan_string("}") == json::lexer::token_type::end_object));
CHECK((scan_string(",") == json::lexer::token_type::value_separator));
CHECK((scan_string(":") == json::lexer::token_type::name_separator));
}
SECTION("literal names")
{
CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("null"),
4).scan() == json::lexer::token_type::literal_null));
CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("true"),
4).scan() == json::lexer::token_type::literal_true));
CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("false"),
5).scan() == json::lexer::token_type::literal_false));
CHECK((scan_string("null") == json::lexer::token_type::literal_null));
CHECK((scan_string("true") == json::lexer::token_type::literal_true));
CHECK((scan_string("false") == json::lexer::token_type::literal_false));
}
SECTION("numbers")
{
CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("0"),
1).scan() == json::lexer::token_type::value_unsigned));
CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("1"),
1).scan() == json::lexer::token_type::value_unsigned));
CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("2"),
1).scan() == json::lexer::token_type::value_unsigned));
CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("3"),
1).scan() == json::lexer::token_type::value_unsigned));
CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("4"),
1).scan() == json::lexer::token_type::value_unsigned));
CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("5"),
1).scan() == json::lexer::token_type::value_unsigned));
CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("6"),
1).scan() == json::lexer::token_type::value_unsigned));
CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("7"),
1).scan() == json::lexer::token_type::value_unsigned));
CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("8"),
1).scan() == json::lexer::token_type::value_unsigned));
CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("9"),
1).scan() == json::lexer::token_type::value_unsigned));
CHECK((scan_string("0") == json::lexer::token_type::value_unsigned));
CHECK((scan_string("1") == json::lexer::token_type::value_unsigned));
CHECK((scan_string("2") == json::lexer::token_type::value_unsigned));
CHECK((scan_string("3") == json::lexer::token_type::value_unsigned));
CHECK((scan_string("4") == json::lexer::token_type::value_unsigned));
CHECK((scan_string("5") == json::lexer::token_type::value_unsigned));
CHECK((scan_string("6") == json::lexer::token_type::value_unsigned));
CHECK((scan_string("7") == json::lexer::token_type::value_unsigned));
CHECK((scan_string("8") == json::lexer::token_type::value_unsigned));
CHECK((scan_string("9") == json::lexer::token_type::value_unsigned));
CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("-0"),
2).scan() == json::lexer::token_type::value_integer));
CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("-1"),
2).scan() == json::lexer::token_type::value_integer));
CHECK((scan_string("-0") == json::lexer::token_type::value_integer));
CHECK((scan_string("-1") == json::lexer::token_type::value_integer));
CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("1.1"),
3).scan() == json::lexer::token_type::value_float));
CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("-1.1"),
4).scan() == json::lexer::token_type::value_float));
CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("1E10"),
4).scan() == json::lexer::token_type::value_float));
CHECK((scan_string("1.1") == json::lexer::token_type::value_float));
CHECK((scan_string("-1.1") == json::lexer::token_type::value_float));
CHECK((scan_string("1E10") == json::lexer::token_type::value_float));
}
SECTION("whitespace")
{
// result is end_of_input, because not token is following
CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>(" "),
1).scan() == json::lexer::token_type::end_of_input));
CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("\t"),
1).scan() == json::lexer::token_type::end_of_input));
CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("\n"),
1).scan() == json::lexer::token_type::end_of_input));
CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("\r"),
1).scan() == json::lexer::token_type::end_of_input));
CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>(" \t\n\r\n\t "),
7).scan() == json::lexer::token_type::end_of_input));
CHECK((scan_string(" ") == json::lexer::token_type::end_of_input));
CHECK((scan_string("\t") == json::lexer::token_type::end_of_input));
CHECK((scan_string("\n") == json::lexer::token_type::end_of_input));
CHECK((scan_string("\r") == json::lexer::token_type::end_of_input));
CHECK((scan_string(" \t\n\r\n\t ") == json::lexer::token_type::end_of_input));
}
}
SECTION("token_type_name")
{
CHECK((json::lexer::token_type_name(json::lexer::token_type::uninitialized) == "<uninitialized>"));
CHECK((json::lexer::token_type_name(json::lexer::token_type::literal_true) == "true literal"));
CHECK((json::lexer::token_type_name(json::lexer::token_type::literal_false) == "false literal"));
CHECK((json::lexer::token_type_name(json::lexer::token_type::literal_null) == "null literal"));
CHECK((json::lexer::token_type_name(json::lexer::token_type::value_string) == "string literal"));
CHECK((json::lexer::token_type_name(json::lexer::token_type::value_unsigned) == "number literal"));
CHECK((json::lexer::token_type_name(json::lexer::token_type::value_integer) == "number literal"));
CHECK((json::lexer::token_type_name(json::lexer::token_type::value_float) == "number literal"));
CHECK((json::lexer::token_type_name(json::lexer::token_type::begin_array) == "'['"));
CHECK((json::lexer::token_type_name(json::lexer::token_type::begin_object) == "'{'"));
CHECK((json::lexer::token_type_name(json::lexer::token_type::end_array) == "']'"));
CHECK((json::lexer::token_type_name(json::lexer::token_type::end_object) == "'}'"));
CHECK((json::lexer::token_type_name(json::lexer::token_type::name_separator) == "':'"));
CHECK((json::lexer::token_type_name(json::lexer::token_type::value_separator) == "','"));
CHECK((json::lexer::token_type_name(json::lexer::token_type::parse_error) == "<parse error>"));
CHECK((json::lexer::token_type_name(json::lexer::token_type::end_of_input) == "end of input"));
CHECK((std::string(json::lexer::token_type_name(json::lexer::token_type::uninitialized)) == "<uninitialized>"));
CHECK((std::string(json::lexer::token_type_name(json::lexer::token_type::literal_true)) == "true literal"));
CHECK((std::string(json::lexer::token_type_name(json::lexer::token_type::literal_false)) == "false literal"));
CHECK((std::string(json::lexer::token_type_name(json::lexer::token_type::literal_null)) == "null literal"));
CHECK((std::string(json::lexer::token_type_name(json::lexer::token_type::value_string)) == "string literal"));
CHECK((std::string(json::lexer::token_type_name(json::lexer::token_type::value_unsigned)) == "number literal"));
CHECK((std::string(json::lexer::token_type_name(json::lexer::token_type::value_integer)) == "number literal"));
CHECK((std::string(json::lexer::token_type_name(json::lexer::token_type::value_float)) == "number literal"));
CHECK((std::string(json::lexer::token_type_name(json::lexer::token_type::begin_array)) == "'['"));
CHECK((std::string(json::lexer::token_type_name(json::lexer::token_type::begin_object)) == "'{'"));
CHECK((std::string(json::lexer::token_type_name(json::lexer::token_type::end_array)) == "']'"));
CHECK((std::string(json::lexer::token_type_name(json::lexer::token_type::end_object)) == "'}'"));
CHECK((std::string(json::lexer::token_type_name(json::lexer::token_type::name_separator)) == "':'"));
CHECK((std::string(json::lexer::token_type_name(json::lexer::token_type::value_separator)) == "','"));
CHECK((std::string(json::lexer::token_type_name(json::lexer::token_type::parse_error)) == "<parse error>"));
CHECK((std::string(json::lexer::token_type_name(json::lexer::token_type::end_of_input)) == "end of input"));
}
SECTION("parse errors on first character")
@ -141,8 +119,7 @@ TEST_CASE("lexer class")
// create string from the ASCII code
const auto s = std::string(1, static_cast<char>(c));
// store scan() result
const auto res = json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>(s.c_str()),
1).scan();
const auto res = scan_string(s.c_str());
switch (c)
{
@ -188,12 +165,23 @@ TEST_CASE("lexer class")
}
}
SECTION("very large string")
{
// strings larger than 1024 bytes yield a resize of the lexer's yytext buffer
std::string s("\"");
s += std::string(2048, 'x');
s += "\"";
CHECK((scan_string(s.c_str()) == json::lexer::token_type::value_string));
}
/* NOTE: to_unicode function has been removed
SECTION("to_unicode")
{
// lexer to call to_unicode on
json::lexer dummy_lexer(reinterpret_cast<const json::lexer::lexer_char_t*>(""), 0);
json::lexer dummy_lexer("", 0);
CHECK(dummy_lexer.to_unicode(0x1F4A9) == "💩");
CHECK_THROWS_AS(dummy_lexer.to_unicode(0x200000), json::parse_error);
CHECK_THROWS_WITH(dummy_lexer.to_unicode(0x200000), "[json.exception.parse_error.103] parse error: code points above 0x10FFFF are invalid");
}
*/
}

File diff suppressed because it is too large Load diff

View file

@ -53,7 +53,7 @@ TEST_CASE("convenience functions")
const char* escaped)
{
std::stringstream ss;
json::serializer s(ss);
json::serializer s(json::output_adapter<char>::create(ss));
s.dump_escaped(original);
CHECK(ss.str() == escaped);
};

View file

@ -92,7 +92,7 @@ TEST_CASE("deserialization")
ss2 << "[\"foo\",1,2,3,false,{\"one\":1}";
CHECK_THROWS_AS(json::parse(ss1), json::parse_error);
CHECK_THROWS_WITH(json::parse(ss2),
"[json.exception.parse_error.101] parse error at 30: parse error - unexpected end of input; expected ']'");
"[json.exception.parse_error.101] parse error at 29: syntax error - unexpected end of input; expected ']'");
}
SECTION("string")
@ -100,7 +100,7 @@ TEST_CASE("deserialization")
json::string_t s = "[\"foo\",1,2,3,false,{\"one\":1}";
CHECK_THROWS_AS(json::parse(s), json::parse_error);
CHECK_THROWS_WITH(json::parse(s),
"[json.exception.parse_error.101] parse error at 29: parse error - unexpected end of input; expected ']'");
"[json.exception.parse_error.101] parse error at 29: syntax error - unexpected end of input; expected ']'");
}
SECTION("operator<<")
@ -111,7 +111,7 @@ TEST_CASE("deserialization")
json j;
CHECK_THROWS_AS(j << ss1, json::parse_error);
CHECK_THROWS_WITH(j << ss2,
"[json.exception.parse_error.101] parse error at 30: parse error - unexpected end of input; expected ']'");
"[json.exception.parse_error.101] parse error at 29: syntax error - unexpected end of input; expected ']'");
}
SECTION("operator>>")
@ -122,14 +122,14 @@ TEST_CASE("deserialization")
json j;
CHECK_THROWS_AS(ss1 >> j, json::parse_error);
CHECK_THROWS_WITH(ss2 >> j,
"[json.exception.parse_error.101] parse error at 30: parse error - unexpected end of input; expected ']'");
"[json.exception.parse_error.101] parse error at 29: syntax error - unexpected end of input; expected ']'");
}
SECTION("user-defined string literal")
{
CHECK_THROWS_AS("[\"foo\",1,2,3,false,{\"one\":1}"_json, json::parse_error);
CHECK_THROWS_WITH("[\"foo\",1,2,3,false,{\"one\":1}"_json,
"[json.exception.parse_error.101] parse error at 29: parse error - unexpected end of input; expected ']'");
"[json.exception.parse_error.101] parse error at 29: syntax error - unexpected end of input; expected ']'");
}
}

View file

@ -676,14 +676,9 @@ TEST_CASE("MessagePack")
const auto result = json::to_msgpack(j);
CHECK(result == expected);
// restore value (reverse array for endianess)
double restored;
std::reverse(expected.begin(), expected.end());
memcpy(&restored, expected.data(), sizeof(double));
CHECK(restored == v);
// roundtrip
CHECK(json::from_msgpack(result) == j);
CHECK(json::from_msgpack(result) == v);
}
}
}
@ -1038,35 +1033,35 @@ TEST_CASE("MessagePack")
CHECK_THROWS_AS(json::from_msgpack(std::vector<uint8_t>({0xcf, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00})), json::parse_error);
CHECK_THROWS_WITH(json::from_msgpack(std::vector<uint8_t>({0xcc})),
"[json.exception.parse_error.110] parse error at 2: cannot read 1 bytes from vector");
"[json.exception.parse_error.110] parse error at 2: unexpected end of input");
CHECK_THROWS_WITH(json::from_msgpack(std::vector<uint8_t>({0xcd})),
"[json.exception.parse_error.110] parse error at 2: cannot read 2 bytes from vector");
"[json.exception.parse_error.110] parse error at 2: unexpected end of input");
CHECK_THROWS_WITH(json::from_msgpack(std::vector<uint8_t>({0xcd, 0x00})),
"[json.exception.parse_error.110] parse error at 2: cannot read 2 bytes from vector");
"[json.exception.parse_error.110] parse error at 3: unexpected end of input");
CHECK_THROWS_WITH(json::from_msgpack(std::vector<uint8_t>({0xce})),
"[json.exception.parse_error.110] parse error at 2: cannot read 4 bytes from vector");
"[json.exception.parse_error.110] parse error at 2: unexpected end of input");
CHECK_THROWS_WITH(json::from_msgpack(std::vector<uint8_t>({0xce, 0x00})),
"[json.exception.parse_error.110] parse error at 2: cannot read 4 bytes from vector");
"[json.exception.parse_error.110] parse error at 3: unexpected end of input");
CHECK_THROWS_WITH(json::from_msgpack(std::vector<uint8_t>({0xce, 0x00, 0x00})),
"[json.exception.parse_error.110] parse error at 2: cannot read 4 bytes from vector");
"[json.exception.parse_error.110] parse error at 4: unexpected end of input");
CHECK_THROWS_WITH(json::from_msgpack(std::vector<uint8_t>({0xce, 0x00, 0x00, 0x00})),
"[json.exception.parse_error.110] parse error at 2: cannot read 4 bytes from vector");
"[json.exception.parse_error.110] parse error at 5: unexpected end of input");
CHECK_THROWS_WITH(json::from_msgpack(std::vector<uint8_t>({0xcf})),
"[json.exception.parse_error.110] parse error at 2: cannot read 8 bytes from vector");
"[json.exception.parse_error.110] parse error at 2: unexpected end of input");
CHECK_THROWS_WITH(json::from_msgpack(std::vector<uint8_t>({0xcf, 0x00})),
"[json.exception.parse_error.110] parse error at 2: cannot read 8 bytes from vector");
"[json.exception.parse_error.110] parse error at 3: unexpected end of input");
CHECK_THROWS_WITH(json::from_msgpack(std::vector<uint8_t>({0xcf, 0x00, 0x00})),
"[json.exception.parse_error.110] parse error at 2: cannot read 8 bytes from vector");
"[json.exception.parse_error.110] parse error at 4: unexpected end of input");
CHECK_THROWS_WITH(json::from_msgpack(std::vector<uint8_t>({0xcf, 0x00, 0x00, 0x00})),
"[json.exception.parse_error.110] parse error at 2: cannot read 8 bytes from vector");
"[json.exception.parse_error.110] parse error at 5: unexpected end of input");
CHECK_THROWS_WITH(json::from_msgpack(std::vector<uint8_t>({0xcf, 0x00, 0x00, 0x00, 0x00})),
"[json.exception.parse_error.110] parse error at 2: cannot read 8 bytes from vector");
"[json.exception.parse_error.110] parse error at 6: unexpected end of input");
CHECK_THROWS_WITH(json::from_msgpack(std::vector<uint8_t>({0xcf, 0x00, 0x00, 0x00, 0x00, 0x00})),
"[json.exception.parse_error.110] parse error at 2: cannot read 8 bytes from vector");
"[json.exception.parse_error.110] parse error at 7: unexpected end of input");
CHECK_THROWS_WITH(json::from_msgpack(std::vector<uint8_t>({0xcf, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00})),
"[json.exception.parse_error.110] parse error at 2: cannot read 8 bytes from vector");
"[json.exception.parse_error.110] parse error at 8: unexpected end of input");
CHECK_THROWS_WITH(json::from_msgpack(std::vector<uint8_t>({0xcf, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00})),
"[json.exception.parse_error.110] parse error at 2: cannot read 8 bytes from vector");
"[json.exception.parse_error.110] parse error at 9: unexpected end of input");
}
SECTION("unsupported bytes")

View file

@ -596,7 +596,7 @@ TEST_CASE("regression tests")
// a parse error because of the EOF.
CHECK_THROWS_AS(ss >> j, json::parse_error);
CHECK_THROWS_WITH(ss >> j,
"[json.exception.parse_error.101] parse error at 1: parse error - unexpected end of input");
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input");
}
SECTION("issue #389 - Integer-overflow (OSS-Fuzz issue 267)")
@ -629,7 +629,7 @@ TEST_CASE("regression tests")
std::vector<uint8_t> vec {0x65, 0xf5, 0x0a, 0x48, 0x21};
CHECK_THROWS_AS(json::from_cbor(vec), json::parse_error);
CHECK_THROWS_WITH(json::from_cbor(vec),
"[json.exception.parse_error.110] parse error at 2: cannot read 5 bytes from vector");
"[json.exception.parse_error.110] parse error at 6: unexpected end of input");
}
SECTION("issue #407 - Heap-buffer-overflow (OSS-Fuzz issue 343)")
@ -638,31 +638,31 @@ TEST_CASE("regression tests")
std::vector<uint8_t> vec1 {0xcb, 0x8f, 0x0a};
CHECK_THROWS_AS(json::from_msgpack(vec1), json::parse_error);
CHECK_THROWS_WITH(json::from_msgpack(vec1),
"[json.exception.parse_error.110] parse error at 2: cannot read 8 bytes from vector");
"[json.exception.parse_error.110] parse error at 4: unexpected end of input");
// related test case: incomplete float32
std::vector<uint8_t> vec2 {0xca, 0x8f, 0x0a};
CHECK_THROWS_AS(json::from_msgpack(vec2), json::parse_error);
CHECK_THROWS_WITH(json::from_msgpack(vec2),
"[json.exception.parse_error.110] parse error at 2: cannot read 4 bytes from vector");
"[json.exception.parse_error.110] parse error at 4: unexpected end of input");
// related test case: incomplete Half-Precision Float (CBOR)
std::vector<uint8_t> vec3 {0xf9, 0x8f};
CHECK_THROWS_AS(json::from_cbor(vec3), json::parse_error);
CHECK_THROWS_WITH(json::from_cbor(vec3),
"[json.exception.parse_error.110] parse error at 2: cannot read 2 bytes from vector");
"[json.exception.parse_error.110] parse error at 3: unexpected end of input");
// related test case: incomplete Single-Precision Float (CBOR)
std::vector<uint8_t> vec4 {0xfa, 0x8f, 0x0a};
CHECK_THROWS_AS(json::from_cbor(vec4), json::parse_error);
CHECK_THROWS_WITH(json::from_cbor(vec4),
"[json.exception.parse_error.110] parse error at 2: cannot read 4 bytes from vector");
"[json.exception.parse_error.110] parse error at 4: unexpected end of input");
// related test case: incomplete Double-Precision Float (CBOR)
std::vector<uint8_t> vec5 {0xfb, 0x8f, 0x0a};
CHECK_THROWS_AS(json::from_cbor(vec5), json::parse_error);
CHECK_THROWS_WITH(json::from_cbor(vec5),
"[json.exception.parse_error.110] parse error at 2: cannot read 8 bytes from vector");
"[json.exception.parse_error.110] parse error at 4: unexpected end of input");
}
SECTION("issue #408 - Heap-buffer-overflow (OSS-Fuzz issue 344)")
@ -671,7 +671,7 @@ TEST_CASE("regression tests")
std::vector<uint8_t> vec1 {0x87};
CHECK_THROWS_AS(json::from_msgpack(vec1), json::parse_error);
CHECK_THROWS_WITH(json::from_msgpack(vec1),
"[json.exception.parse_error.110] parse error at 2: cannot read 1 bytes from vector");
"[json.exception.parse_error.110] parse error at 2: unexpected end of input");
// more test cases for MessagePack
for (auto b :
@ -705,10 +705,10 @@ TEST_CASE("regression tests")
std::vector<uint8_t> vec2;
CHECK_THROWS_AS(json::from_cbor(vec2), json::parse_error);
CHECK_THROWS_WITH(json::from_cbor(vec2),
"[json.exception.parse_error.110] parse error at 1: cannot read 1 bytes from vector");
"[json.exception.parse_error.110] parse error at 1: unexpected end of input");
CHECK_THROWS_AS(json::from_msgpack(vec2), json::parse_error);
CHECK_THROWS_WITH(json::from_msgpack(vec2),
"[json.exception.parse_error.110] parse error at 1: cannot read 1 bytes from vector");
"[json.exception.parse_error.110] parse error at 1: unexpected end of input");
}
SECTION("issue #411 - Heap-buffer-overflow (OSS-Fuzz issue 366)")
@ -717,19 +717,19 @@ TEST_CASE("regression tests")
std::vector<uint8_t> vec1 {0x7f};
CHECK_THROWS_AS(json::from_cbor(vec1), json::parse_error);
CHECK_THROWS_WITH(json::from_cbor(vec1),
"[json.exception.parse_error.110] parse error at 2: cannot read 1 bytes from vector");
"[json.exception.parse_error.110] parse error at 2: unexpected end of input");
// related test case: empty array (indefinite length)
std::vector<uint8_t> vec2 {0x9f};
CHECK_THROWS_AS(json::from_cbor(vec2), json::parse_error);
CHECK_THROWS_WITH(json::from_cbor(vec2),
"[json.exception.parse_error.110] parse error at 2: cannot read 1 bytes from vector");
"[json.exception.parse_error.110] parse error at 2: unexpected end of input");
// related test case: empty map (indefinite length)
std::vector<uint8_t> vec3 {0xbf};
CHECK_THROWS_AS(json::from_cbor(vec3), json::parse_error);
CHECK_THROWS_WITH(json::from_cbor(vec3),
"[json.exception.parse_error.110] parse error at 2: cannot read 1 bytes from vector");
"[json.exception.parse_error.110] parse error at 2: unexpected end of input");
}
SECTION("issue #412 - Heap-buffer-overflow (OSS-Fuzz issue 367)")
@ -763,19 +763,19 @@ TEST_CASE("regression tests")
std::vector<uint8_t> vec1 {0x7f, 0x61, 0x61};
CHECK_THROWS_AS(json::from_cbor(vec1), json::parse_error);
CHECK_THROWS_WITH(json::from_cbor(vec1),
"[json.exception.parse_error.110] parse error at 4: cannot read 1 bytes from vector");
"[json.exception.parse_error.110] parse error at 4: unexpected end of input");
// related test case: nonempty array (indefinite length)
std::vector<uint8_t> vec2 {0x9f, 0x01};
CHECK_THROWS_AS(json::from_cbor(vec2), json::parse_error);
CHECK_THROWS_WITH(json::from_cbor(vec2),
"[json.exception.parse_error.110] parse error at 3: cannot read 1 bytes from vector");
"[json.exception.parse_error.110] parse error at 3: unexpected end of input");
// related test case: nonempty map (indefinite length)
std::vector<uint8_t> vec3 {0xbf, 0x61, 0x61, 0x01};
CHECK_THROWS_AS(json::from_cbor(vec3), json::parse_error);
CHECK_THROWS_WITH(json::from_cbor(vec3),
"[json.exception.parse_error.110] parse error at 5: cannot read 1 bytes from vector");
"[json.exception.parse_error.110] parse error at 5: unexpected end of input");
}
SECTION("issue #414 - compare with literal 0)")
@ -921,6 +921,7 @@ TEST_CASE("regression tests")
CHECK(j["bool_vector"].dump() == "[false,true,false,false]");
}
/* NOTE: m_line_buffer is not used any more
SECTION("issue #495 - fill_line_buffer incorrectly tests m_stream for eof but not fail or bad bits")
{
SECTION("setting failbit")
@ -953,6 +954,7 @@ TEST_CASE("regression tests")
CHECK_THROWS_WITH(l.fill_line_buffer(), "[json.exception.parse_error.111] parse error: bad input stream");
}
}
*/
SECTION("issue #504 - assertion error (OSS-Fuzz 856)")
{

View file

@ -77,8 +77,8 @@ TEST_CASE("compliance tests from json.org")
})
{
CAPTURE(filename);
json j;
std::ifstream f(filename);
json j;
CHECK_THROWS_AS(f >> j, json::parse_error);
}
}
@ -93,8 +93,8 @@ TEST_CASE("compliance tests from json.org")
})
{
CAPTURE(filename);
json j;
std::ifstream f(filename);
json j;
CHECK_NOTHROW(f >> j);
}
}
@ -305,6 +305,7 @@ TEST_CASE("compliance tests from nativejson-benchmark")
std::string json_string( (std::istreambuf_iterator<char>(f) ),
(std::istreambuf_iterator<char>()) );
CAPTURE(json_string);
json j = json::parse(json_string);
CHECK(j.dump() == json_string);
}

View file

@ -34,17 +34,832 @@ using nlohmann::json;
#include <fstream>
// create and check a JSON string with up to four UTF-8 bytes
void check_utf8string(bool success_expected, int byte1, int byte2 = -1, int byte3 = -1, int byte4 = -1)
{
std::string json_string = "\"";
CAPTURE(byte1);
json_string += std::string(1, static_cast<char>(byte1));
if (byte2 != -1)
{
CAPTURE(byte2);
json_string += std::string(1, static_cast<char>(byte2));
}
if (byte3 != -1)
{
CAPTURE(byte3);
json_string += std::string(1, static_cast<char>(byte3));
}
if (byte4 != -1)
{
CAPTURE(byte4);
json_string += std::string(1, static_cast<char>(byte4));
}
json_string += "\"";
CAPTURE(json_string);
if (success_expected)
{
CHECK_NOTHROW(json::parse(json_string));
}
else
{
CHECK_THROWS_AS(json::parse(json_string), json::parse_error);
}
}
TEST_CASE("Unicode", "[hide]")
{
SECTION("full enumeration of Unicode code points")
SECTION("RFC 3629")
{
// lexer to call to_unicode on
json::lexer dummy_lexer(reinterpret_cast<const json::lexer::lexer_char_t*>(""), 0);
/*
RFC 3629 describes in Sect. 4 the syntax of UTF-8 byte sequences as
follows:
A UTF-8 string is a sequence of octets representing a sequence of UCS
characters. An octet sequence is valid UTF-8 only if it matches the
following syntax, which is derived from the rules for encoding UTF-8
and is expressed in the ABNF of [RFC2234].
UTF8-octets = *( UTF8-char )
UTF8-char = UTF8-1 / UTF8-2 / UTF8-3 / UTF8-4
UTF8-1 = %x00-7F
UTF8-2 = %xC2-DF UTF8-tail
UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
%xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
%xF4 %x80-8F 2( UTF8-tail )
UTF8-tail = %x80-BF
*/
SECTION("ill-formed first byte")
{
for (int byte1 = 0x80; byte1 <= 0xC1; ++byte1)
{
check_utf8string(false, byte1);
}
for (int byte1 = 0xF5; byte1 <= 0xFF; ++byte1)
{
check_utf8string(false, byte1);
}
}
SECTION("UTF8-1 (x00-x7F)")
{
SECTION("well-formed")
{
for (int byte1 = 0x00; byte1 <= 0x7F; ++byte1)
{
// unescaped control characters are parse errors in JSON
if (0x00 <= byte1 and byte1 <= 0x1F)
{
check_utf8string(false, byte1);
continue;
}
// a single quote is a parse error in JSON
if (byte1 == 0x22)
{
check_utf8string(false, byte1);
continue;
}
// a single backslash is a parse error in JSON
if (byte1 == 0x5C)
{
check_utf8string(false, byte1);
continue;
}
// all other characters are OK
check_utf8string(true, byte1);
}
}
}
SECTION("UTF8-2 (xC2-xDF UTF8-tail)")
{
SECTION("well-formed")
{
for (int byte1 = 0xC2; byte1 <= 0xDF; ++byte1)
{
for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2)
{
check_utf8string(true, byte1, byte2);
}
}
}
SECTION("ill-formed: missing second byte")
{
for (int byte1 = 0xC2; byte1 <= 0xDF; ++byte1)
{
check_utf8string(false, byte1);
}
}
SECTION("ill-formed: wrong second byte")
{
for (int byte1 = 0xC2; byte1 <= 0xDF; ++byte1)
{
for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2)
{
// skip correct second byte
if (0x80 <= byte2 and byte2 <= 0xBF)
{
continue;
}
check_utf8string(false, byte1, byte2);
}
}
}
}
SECTION("UTF8-3 (xE0 xA0-BF UTF8-tail)")
{
SECTION("well-formed")
{
for (int byte1 = 0xE0; byte1 <= 0xE0; ++byte1)
{
for (int byte2 = 0xA0; byte2 <= 0xBF; ++byte2)
{
for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
{
check_utf8string(true, byte1, byte2, byte3);
}
}
}
}
SECTION("ill-formed: missing second byte")
{
for (int byte1 = 0xE0; byte1 <= 0xE0; ++byte1)
{
check_utf8string(false, byte1);
}
}
SECTION("ill-formed: missing third byte")
{
for (int byte1 = 0xE0; byte1 <= 0xE0; ++byte1)
{
for (int byte2 = 0xA0; byte2 <= 0xBF; ++byte2)
{
check_utf8string(false, byte1, byte2);
}
}
}
SECTION("ill-formed: wrong second byte")
{
for (int byte1 = 0xE0; byte1 <= 0xE0; ++byte1)
{
for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2)
{
// skip correct second byte
if (0xA0 <= byte2 and byte2 <= 0xBF)
{
continue;
}
for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
{
check_utf8string(false, byte1, byte2, byte3);
}
}
}
}
SECTION("ill-formed: wrong third byte")
{
for (int byte1 = 0xE0; byte1 <= 0xE0; ++byte1)
{
for (int byte2 = 0xA0; byte2 <= 0xBF; ++byte2)
{
for (int byte3 = 0x00; byte3 <= 0xFF; ++byte3)
{
// skip correct third byte
if (0x80 <= byte3 and byte3 <= 0xBF)
{
continue;
}
check_utf8string(false, byte1, byte2, byte3);
}
}
}
}
}
SECTION("UTF8-3 (xE1-xEC UTF8-tail UTF8-tail)")
{
SECTION("well-formed")
{
for (int byte1 = 0xE1; byte1 <= 0xEC; ++byte1)
{
for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2)
{
for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
{
check_utf8string(true, byte1, byte2, byte3);
}
}
}
}
SECTION("ill-formed: missing second byte")
{
for (int byte1 = 0xE1; byte1 <= 0xEC; ++byte1)
{
check_utf8string(false, byte1);
}
}
SECTION("ill-formed: missing third byte")
{
for (int byte1 = 0xE1; byte1 <= 0xEC; ++byte1)
{
for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2)
{
check_utf8string(false, byte1, byte2);
}
}
}
SECTION("ill-formed: wrong second byte")
{
for (int byte1 = 0xE1; byte1 <= 0xEC; ++byte1)
{
for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2)
{
// skip correct second byte
if (0x80 <= byte2 and byte2 <= 0xBF)
{
continue;
}
for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
{
check_utf8string(false, byte1, byte2, byte3);
}
}
}
}
SECTION("ill-formed: wrong third byte")
{
for (int byte1 = 0xE1; byte1 <= 0xEC; ++byte1)
{
for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2)
{
for (int byte3 = 0x00; byte3 <= 0xFF; ++byte3)
{
// skip correct third byte
if (0x80 <= byte3 and byte3 <= 0xBF)
{
continue;
}
check_utf8string(false, byte1, byte2, byte3);
}
}
}
}
}
SECTION("UTF8-3 (xED x80-9F UTF8-tail)")
{
SECTION("well-formed")
{
for (int byte1 = 0xED; byte1 <= 0xED; ++byte1)
{
for (int byte2 = 0x80; byte2 <= 0x9F; ++byte2)
{
for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
{
check_utf8string(true, byte1, byte2, byte3);
}
}
}
}
SECTION("ill-formed: missing second byte")
{
for (int byte1 = 0xED; byte1 <= 0xED; ++byte1)
{
check_utf8string(false, byte1);
}
}
SECTION("ill-formed: missing third byte")
{
for (int byte1 = 0xED; byte1 <= 0xED; ++byte1)
{
for (int byte2 = 0x80; byte2 <= 0x9F; ++byte2)
{
check_utf8string(false, byte1, byte2);
}
}
}
SECTION("ill-formed: wrong second byte")
{
for (int byte1 = 0xED; byte1 <= 0xED; ++byte1)
{
for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2)
{
// skip correct second byte
if (0x80 <= byte2 and byte2 <= 0x9F)
{
continue;
}
for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
{
check_utf8string(false, byte1, byte2, byte3);
}
}
}
}
SECTION("ill-formed: wrong third byte")
{
for (int byte1 = 0xED; byte1 <= 0xED; ++byte1)
{
for (int byte2 = 0x80; byte2 <= 0x9F; ++byte2)
{
for (int byte3 = 0x00; byte3 <= 0xFF; ++byte3)
{
// skip correct third byte
if (0x80 <= byte3 and byte3 <= 0xBF)
{
continue;
}
check_utf8string(false, byte1, byte2, byte3);
}
}
}
}
}
SECTION("UTF8-3 (xEE-xEF UTF8-tail UTF8-tail)")
{
SECTION("well-formed")
{
for (int byte1 = 0xEE; byte1 <= 0xEF; ++byte1)
{
for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2)
{
for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
{
check_utf8string(true, byte1, byte2, byte3);
}
}
}
}
SECTION("ill-formed: missing second byte")
{
for (int byte1 = 0xEE; byte1 <= 0xEF; ++byte1)
{
check_utf8string(false, byte1);
}
}
SECTION("ill-formed: missing third byte")
{
for (int byte1 = 0xEE; byte1 <= 0xEF; ++byte1)
{
for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2)
{
check_utf8string(false, byte1, byte2);
}
}
}
SECTION("ill-formed: wrong second byte")
{
for (int byte1 = 0xEE; byte1 <= 0xEF; ++byte1)
{
for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2)
{
// skip correct second byte
if (0x80 <= byte2 and byte2 <= 0xBF)
{
continue;
}
for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
{
check_utf8string(false, byte1, byte2, byte3);
}
}
}
}
SECTION("ill-formed: wrong third byte")
{
for (int byte1 = 0xEE; byte1 <= 0xEF; ++byte1)
{
for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2)
{
for (int byte3 = 0x00; byte3 <= 0xFF; ++byte3)
{
// skip correct third byte
if (0x80 <= byte3 and byte3 <= 0xBF)
{
continue;
}
check_utf8string(false, byte1, byte2, byte3);
}
}
}
}
}
SECTION("UTF8-4 (xF0 x90-BF UTF8-tail UTF8-tail)")
{
SECTION("well-formed")
{
for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1)
{
for (int byte2 = 0x90; byte2 <= 0xBF; ++byte2)
{
for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
{
for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4)
{
check_utf8string(true, byte1, byte2, byte3, byte4);
}
}
}
}
}
SECTION("ill-formed: missing second byte")
{
for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1)
{
check_utf8string(false, byte1);
}
}
SECTION("ill-formed: missing third byte")
{
for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1)
{
for (int byte2 = 0x90; byte2 <= 0xBF; ++byte2)
{
check_utf8string(false, byte1, byte2);
}
}
}
SECTION("ill-formed: missing fourth byte")
{
for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1)
{
for (int byte2 = 0x90; byte2 <= 0xBF; ++byte2)
{
for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
{
check_utf8string(false, byte1, byte2, byte3);
}
}
}
}
SECTION("ill-formed: wrong second byte")
{
for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1)
{
for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2)
{
// skip correct second byte
if (0x90 <= byte2 and byte2 <= 0xBF)
{
continue;
}
for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
{
for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4)
{
check_utf8string(false, byte1, byte2, byte3, byte4);
}
}
}
}
}
SECTION("ill-formed: wrong third byte")
{
for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1)
{
for (int byte2 = 0x90; byte2 <= 0xBF; ++byte2)
{
for (int byte3 = 0x00; byte3 <= 0xFF; ++byte3)
{
// skip correct third byte
if (0x80 <= byte3 and byte3 <= 0xBF)
{
continue;
}
for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4)
{
check_utf8string(false, byte1, byte2, byte3, byte4);
}
}
}
}
}
SECTION("ill-formed: wrong fourth byte")
{
for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1)
{
for (int byte2 = 0x90; byte2 <= 0xBF; ++byte2)
{
for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
{
for (int byte4 = 0x00; byte4 <= 0xFF; ++byte4)
{
// skip fourth second byte
if (0x80 <= byte3 and byte3 <= 0xBF)
{
continue;
}
check_utf8string(false, byte1, byte2, byte3, byte4);
}
}
}
}
}
}
SECTION("UTF8-4 (xF1-F3 UTF8-tail UTF8-tail UTF8-tail)")
{
SECTION("well-formed")
{
for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1)
{
for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2)
{
for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
{
for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4)
{
check_utf8string(true, byte1, byte2, byte3, byte4);
}
}
}
}
}
SECTION("ill-formed: missing second byte")
{
for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1)
{
check_utf8string(false, byte1);
}
}
SECTION("ill-formed: missing third byte")
{
for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1)
{
for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2)
{
check_utf8string(false, byte1, byte2);
}
}
}
SECTION("ill-formed: missing fourth byte")
{
for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1)
{
for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2)
{
for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
{
check_utf8string(false, byte1, byte2, byte3);
}
}
}
}
SECTION("ill-formed: wrong second byte")
{
for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1)
{
for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2)
{
// skip correct second byte
if (0x80 <= byte2 and byte2 <= 0xBF)
{
continue;
}
for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
{
for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4)
{
check_utf8string(false, byte1, byte2, byte3, byte4);
}
}
}
}
}
SECTION("ill-formed: wrong third byte")
{
for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1)
{
for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2)
{
for (int byte3 = 0x00; byte3 <= 0xFF; ++byte3)
{
// skip correct third byte
if (0x80 <= byte3 and byte3 <= 0xBF)
{
continue;
}
for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4)
{
check_utf8string(false, byte1, byte2, byte3, byte4);
}
}
}
}
}
SECTION("ill-formed: wrong fourth byte")
{
for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1)
{
for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2)
{
for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
{
for (int byte4 = 0x00; byte4 <= 0xFF; ++byte4)
{
// skip correct fourth byte
if (0x80 <= byte3 and byte3 <= 0xBF)
{
continue;
}
check_utf8string(false, byte1, byte2, byte3, byte4);
}
}
}
}
}
}
SECTION("UTF8-4 (xF4 x80-8F UTF8-tail UTF8-tail)")
{
SECTION("well-formed")
{
for (int byte1 = 0xF4; byte1 <= 0xF4; ++byte1)
{
for (int byte2 = 0x80; byte2 <= 0x8F; ++byte2)
{
for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
{
for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4)
{
check_utf8string(true, byte1, byte2, byte3, byte4);
}
}
}
}
}
SECTION("ill-formed: missing second byte")
{
for (int byte1 = 0xF4; byte1 <= 0xF4; ++byte1)
{
check_utf8string(false, byte1);
}
}
SECTION("ill-formed: missing third byte")
{
for (int byte1 = 0xF4; byte1 <= 0xF4; ++byte1)
{
for (int byte2 = 0x80; byte2 <= 0x8F; ++byte2)
{
check_utf8string(false, byte1, byte2);
}
}
}
SECTION("ill-formed: missing fourth byte")
{
for (int byte1 = 0xF4; byte1 <= 0xF4; ++byte1)
{
for (int byte2 = 0x80; byte2 <= 0x8F; ++byte2)
{
for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
{
check_utf8string(false, byte1, byte2, byte3);
}
}
}
}
SECTION("ill-formed: wrong second byte")
{
for (int byte1 = 0xF4; byte1 <= 0xF4; ++byte1)
{
for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2)
{
// skip correct second byte
if (0x80 <= byte2 and byte2 <= 0x8F)
{
continue;
}
for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
{
for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4)
{
check_utf8string(false, byte1, byte2, byte3, byte4);
}
}
}
}
}
SECTION("ill-formed: wrong third byte")
{
for (int byte1 = 0xF4; byte1 <= 0xF4; ++byte1)
{
for (int byte2 = 0x80; byte2 <= 0x8F; ++byte2)
{
for (int byte3 = 0x00; byte3 <= 0xFF; ++byte3)
{
// skip correct third byte
if (0x80 <= byte3 and byte3 <= 0xBF)
{
continue;
}
for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4)
{
check_utf8string(false, byte1, byte2, byte3, byte4);
}
}
}
}
}
SECTION("ill-formed: wrong fourth byte")
{
for (int byte1 = 0xF4; byte1 <= 0xF4; ++byte1)
{
for (int byte2 = 0x80; byte2 <= 0x8F; ++byte2)
{
for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
{
for (int byte4 = 0x00; byte4 <= 0xFF; ++byte4)
{
// skip correct fourth byte
if (0x80 <= byte3 and byte3 <= 0xBF)
{
continue;
}
check_utf8string(false, byte1, byte2, byte3, byte4);
}
}
}
}
}
}
}
SECTION("\\uxxxx sequences")
{
// create an escaped string from a code point
const auto codepoint_to_unicode = [](std::size_t cp)
{
// copd points are represented as a six-character sequence: a
// code points are represented as a six-character sequence: a
// reverse solidus, followed by the lowercase letter u, followed
// by four hexadecimal digits that encode the character's code
// point
@ -53,70 +868,100 @@ TEST_CASE("Unicode", "[hide]")
return ss.str();
};
// generate all UTF-8 code points; in total, 1112064 code points are
// generated: 0x1FFFFF code points - 2048 invalid values between
// 0xD800 and 0xDFFF.
for (std::size_t cp = 0; cp <= 0x10FFFFu; ++cp)
SECTION("correct sequences")
{
// The Unicode standard permanently reserves these code point
// values for UTF-16 encoding of the high and low surrogates, and
// they will never be assigned a character, so there should be no
// reason to encode them. The official Unicode standard says that
// no UTF forms, including UTF-16, can encode these code points.
if (cp >= 0xD800u and cp <= 0xDFFFu)
// generate all UTF-8 code points; in total, 1112064 code points are
// generated: 0x1FFFFF code points - 2048 invalid values between
// 0xD800 and 0xDFFF.
for (std::size_t cp = 0; cp <= 0x10FFFFu; ++cp)
{
// if we would not skip these code points, we would get a
// "missing low surrogate" exception
continue;
}
// string to store the code point as in \uxxxx format
std::string json_text = "\"";
// string to store the code point as in \uxxxx format
std::string escaped_string;
// string to store the code point as unescaped character sequence
std::string unescaped_string;
if (cp < 0x10000u)
{
// code points in the Basic Multilingual Plane can be
// represented with one \\uxxxx sequence
escaped_string = codepoint_to_unicode(cp);
// All Unicode characters may be placed within the quotation
// marks, except for the characters that must be escaped:
// quotation mark, reverse solidus, and the control characters
// (U+0000 through U+001F); we ignore these code points as
// they are checked with codepoint_to_unicode.
if (cp > 0x1f and cp != 0x22 and cp != 0x5c)
// decide whether to use one or two \uxxxx sequences
if (cp < 0x10000u)
{
unescaped_string = dummy_lexer.to_unicode(cp);
// The Unicode standard permanently reserves these code point
// values for UTF-16 encoding of the high and low surrogates, and
// they will never be assigned a character, so there should be no
// reason to encode them. The official Unicode standard says that
// no UTF forms, including UTF-16, can encode these code points.
if (cp >= 0xD800u and cp <= 0xDFFFu)
{
// if we would not skip these code points, we would get a
// "missing low surrogate" exception
continue;
}
// code points in the Basic Multilingual Plane can be
// represented with one \uxxxx sequence
json_text += codepoint_to_unicode(cp);
}
else
{
// To escape an extended character that is not in the Basic
// Multilingual Plane, the character is represented as a
// 12-character sequence, encoding the UTF-16 surrogate pair
const auto codepoint1 = 0xd800u + (((cp - 0x10000u) >> 10) & 0x3ffu);
const auto codepoint2 = 0xdc00u + ((cp - 0x10000u) & 0x3ffu);
json_text += codepoint_to_unicode(codepoint1) + codepoint_to_unicode(codepoint2);
}
json_text += "\"";
CAPTURE(json_text);
CHECK_NOTHROW(json::parse(json_text));
}
}
#if 0
SECTION("incorrect sequences")
{
SECTION("high surrogate without low surrogate")
{
// D800..DBFF are high surrogates and must be followed by low
// surrogates DC00..DFFF; here, nothing follows
for (std::size_t cp = 0xD800u; cp <= 0xDBFFu; ++cp)
{
std::string json_text = "\"" + codepoint_to_unicode(cp) + "\"";
CAPTURE(json_text);
CHECK_THROWS_AS(json::parse(json_text), json::parse_error);
}
}
else
SECTION("high surrogate with wrong low surrogate")
{
// To escape an extended character that is not in the Basic
// Multilingual Plane, the character is represented as a
// 12-character sequence, encoding the UTF-16 surrogate pair
const auto codepoint1 = 0xd800u + (((cp - 0x10000u) >> 10) & 0x3ffu);
const auto codepoint2 = 0xdc00u + ((cp - 0x10000u) & 0x3ffu);
escaped_string = codepoint_to_unicode(codepoint1);
escaped_string += codepoint_to_unicode(codepoint2);
unescaped_string += dummy_lexer.to_unicode(codepoint1, codepoint2);
// D800..DBFF are high surrogates and must be followed by low
// surrogates DC00..DFFF; here a different sequence follows
for (std::size_t cp1 = 0xD800u; cp1 <= 0xDBFFu; ++cp1)
{
for (std::size_t cp2 = 0x0000u; cp2 <= 0xFFFFu; ++cp2)
{
if (0xDC00u <= cp2 and cp2 <= 0xDFFFu)
{
continue;
}
std::string json_text = "\"" + codepoint_to_unicode(cp1) + codepoint_to_unicode(cp2) + "\"";
CAPTURE(json_text);
CHECK_THROWS_AS(json::parse(json_text), json::parse_error);
}
}
}
// all other code points are valid and must not yield parse errors
CAPTURE(cp);
CAPTURE(escaped_string);
CAPTURE(unescaped_string);
SECTION("low surrogate without high surrogate")
{
// low surrogates DC00..DFFF must follow high surrogates; here,
// they occur alone
for (std::size_t cp = 0xDC00u; cp <= 0xDFFFu; ++cp)
{
std::string json_text = "\"" + codepoint_to_unicode(cp) + "\"";
CAPTURE(json_text);
CHECK_THROWS_AS(json::parse(json_text), json::parse_error);
}
}
json j1, j2, j3, j4;
CHECK_NOTHROW(j1 = json::parse("\"" + escaped_string + "\""));
CHECK_NOTHROW(j2 = json::parse(j1.dump()));
CHECK(j1 == j2);
CHECK_NOTHROW(j3 = json::parse("\"" + unescaped_string + "\""));
CHECK_NOTHROW(j4 = json::parse(j3.dump()));
CHECK(j3 == j4);
}
#endif
}
SECTION("read all unicode characters")