Merge branch 'feature/manual_lexer' into develop
This commit is contained in:
commit
56ac7908f1
14 changed files with 5887 additions and 17227 deletions
3
.gitignore
vendored
3
.gitignore
vendored
|
@ -30,3 +30,6 @@ test/parse_afl_fuzzer
|
|||
test/parse_cbor_fuzzer
|
||||
|
||||
test/parse_msgpack_fuzzer
|
||||
|
||||
minibench
|
||||
|
||||
|
|
19
Makefile
19
Makefile
|
@ -1,9 +1,5 @@
|
|||
.PHONY: pretty clean ChangeLog.md
|
||||
|
||||
# used programs
|
||||
RE2C := $(shell command -v re2c 2> /dev/null)
|
||||
SED = sed
|
||||
|
||||
# main target
|
||||
all:
|
||||
$(MAKE) -C test
|
||||
|
@ -51,7 +47,8 @@ doctest:
|
|||
# -Wno-keyword-macro: unit-tests use "#define private public"
|
||||
# -Wno-deprecated-declarations: the library deprecated some functions
|
||||
# -Wno-weak-vtables: exception class is defined inline, but has virtual method
|
||||
# -Wno-range-loop-analysis: iterator_wrapper tests tests "for(const auto i...)"
|
||||
# -Wno-range-loop-analysis: iterator_wrapper tests "for(const auto i...)"
|
||||
# -Wno-float-equal: not all comparisons in the tests can be replaced by Approx
|
||||
pedantic_clang:
|
||||
$(MAKE) json_unit CXXFLAGS="\
|
||||
-std=c++11 \
|
||||
|
@ -62,7 +59,8 @@ pedantic_clang:
|
|||
-Wno-keyword-macro \
|
||||
-Wno-deprecated-declarations \
|
||||
-Wno-weak-vtables \
|
||||
-Wno-range-loop-analysis"
|
||||
-Wno-range-loop-analysis \
|
||||
-Wno-float-equal"
|
||||
|
||||
# calling GCC with most warnings
|
||||
pedantic_gcc:
|
||||
|
@ -186,13 +184,6 @@ clang_sanitize: clean
|
|||
# maintainer targets
|
||||
##########################################################################
|
||||
|
||||
# create scanner with re2c
|
||||
re2c: src/json.hpp.re2c
|
||||
ifndef RE2C
|
||||
$(error "re2c is not available, please install re2c")
|
||||
endif
|
||||
$(RE2C) -W --utf-8 --encoding-policy fail --bit-vectors --nested-ifs --no-debug-info $< | $(SED) '1d' > src/json.hpp
|
||||
|
||||
# pretty printer
|
||||
pretty:
|
||||
astyle --style=allman --indent=spaces=4 --indent-modifiers \
|
||||
|
@ -200,7 +191,7 @@ pretty:
|
|||
--indent-col1-comments --pad-oper --pad-header --align-pointer=type \
|
||||
--align-reference=type --add-brackets --convert-tabs --close-templates \
|
||||
--lineend=linux --preserve-date --suffix=none --formatted \
|
||||
src/json.hpp src/json.hpp.re2c test/src/*.cpp \
|
||||
src/json.hpp test/src/*.cpp \
|
||||
benchmarks/benchmarks.cpp doc/examples/*.cpp
|
||||
|
||||
|
||||
|
|
|
@ -899,7 +899,7 @@ $ make json_unit -Ctest
|
|||
$ ./test/json_unit "*"
|
||||
|
||||
===============================================================================
|
||||
All tests passed (11203022 assertions in 48 test cases)
|
||||
All tests passed (13391115 assertions in 49 test cases)
|
||||
```
|
||||
|
||||
Alternatively, you can use [CMake](https://cmake.org) and run
|
||||
|
|
7622
src/json.hpp
7622
src/json.hpp
File diff suppressed because it is too large
Load diff
13064
src/json.hpp.re2c
13064
src/json.hpp.re2c
File diff suppressed because it is too large
Load diff
|
@ -28,7 +28,6 @@ SOFTWARE.
|
|||
|
||||
#include "catch.hpp"
|
||||
|
||||
#define private public
|
||||
#include "json.hpp"
|
||||
using nlohmann::json;
|
||||
|
||||
|
@ -728,14 +727,9 @@ TEST_CASE("CBOR")
|
|||
const auto result = json::to_cbor(j);
|
||||
CHECK(result == expected);
|
||||
|
||||
// restore value (reverse array for endianess)
|
||||
double restored;
|
||||
std::reverse(expected.begin(), expected.end());
|
||||
memcpy(&restored, expected.data(), sizeof(double));
|
||||
CHECK(restored == v);
|
||||
|
||||
// roundtrip
|
||||
CHECK(json::from_cbor(result) == j);
|
||||
CHECK(json::from_cbor(result) == v);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1166,35 +1160,35 @@ TEST_CASE("CBOR")
|
|||
CHECK_THROWS_AS(json::from_cbor(std::vector<uint8_t>({0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00})), json::parse_error);
|
||||
|
||||
CHECK_THROWS_WITH(json::from_cbor(std::vector<uint8_t>({0x18})),
|
||||
"[json.exception.parse_error.110] parse error at 2: cannot read 1 bytes from vector");
|
||||
"[json.exception.parse_error.110] parse error at 2: unexpected end of input");
|
||||
CHECK_THROWS_WITH(json::from_cbor(std::vector<uint8_t>({0x19})),
|
||||
"[json.exception.parse_error.110] parse error at 2: cannot read 2 bytes from vector");
|
||||
"[json.exception.parse_error.110] parse error at 2: unexpected end of input");
|
||||
CHECK_THROWS_WITH(json::from_cbor(std::vector<uint8_t>({0x19, 0x00})),
|
||||
"[json.exception.parse_error.110] parse error at 2: cannot read 2 bytes from vector");
|
||||
"[json.exception.parse_error.110] parse error at 3: unexpected end of input");
|
||||
CHECK_THROWS_WITH(json::from_cbor(std::vector<uint8_t>({0x1a})),
|
||||
"[json.exception.parse_error.110] parse error at 2: cannot read 4 bytes from vector");
|
||||
"[json.exception.parse_error.110] parse error at 2: unexpected end of input");
|
||||
CHECK_THROWS_WITH(json::from_cbor(std::vector<uint8_t>({0x1a, 0x00})),
|
||||
"[json.exception.parse_error.110] parse error at 2: cannot read 4 bytes from vector");
|
||||
"[json.exception.parse_error.110] parse error at 3: unexpected end of input");
|
||||
CHECK_THROWS_WITH(json::from_cbor(std::vector<uint8_t>({0x1a, 0x00, 0x00})),
|
||||
"[json.exception.parse_error.110] parse error at 2: cannot read 4 bytes from vector");
|
||||
"[json.exception.parse_error.110] parse error at 4: unexpected end of input");
|
||||
CHECK_THROWS_WITH(json::from_cbor(std::vector<uint8_t>({0x1a, 0x00, 0x00, 0x00})),
|
||||
"[json.exception.parse_error.110] parse error at 2: cannot read 4 bytes from vector");
|
||||
"[json.exception.parse_error.110] parse error at 5: unexpected end of input");
|
||||
CHECK_THROWS_WITH(json::from_cbor(std::vector<uint8_t>({0x1b})),
|
||||
"[json.exception.parse_error.110] parse error at 2: cannot read 8 bytes from vector");
|
||||
"[json.exception.parse_error.110] parse error at 2: unexpected end of input");
|
||||
CHECK_THROWS_WITH(json::from_cbor(std::vector<uint8_t>({0x1b, 0x00})),
|
||||
"[json.exception.parse_error.110] parse error at 2: cannot read 8 bytes from vector");
|
||||
"[json.exception.parse_error.110] parse error at 3: unexpected end of input");
|
||||
CHECK_THROWS_WITH(json::from_cbor(std::vector<uint8_t>({0x1b, 0x00, 0x00})),
|
||||
"[json.exception.parse_error.110] parse error at 2: cannot read 8 bytes from vector");
|
||||
"[json.exception.parse_error.110] parse error at 4: unexpected end of input");
|
||||
CHECK_THROWS_WITH(json::from_cbor(std::vector<uint8_t>({0x1b, 0x00, 0x00, 0x00})),
|
||||
"[json.exception.parse_error.110] parse error at 2: cannot read 8 bytes from vector");
|
||||
"[json.exception.parse_error.110] parse error at 5: unexpected end of input");
|
||||
CHECK_THROWS_WITH(json::from_cbor(std::vector<uint8_t>({0x1b, 0x00, 0x00, 0x00, 0x00})),
|
||||
"[json.exception.parse_error.110] parse error at 2: cannot read 8 bytes from vector");
|
||||
"[json.exception.parse_error.110] parse error at 6: unexpected end of input");
|
||||
CHECK_THROWS_WITH(json::from_cbor(std::vector<uint8_t>({0x1b, 0x00, 0x00, 0x00, 0x00, 0x00})),
|
||||
"[json.exception.parse_error.110] parse error at 2: cannot read 8 bytes from vector");
|
||||
"[json.exception.parse_error.110] parse error at 7: unexpected end of input");
|
||||
CHECK_THROWS_WITH(json::from_cbor(std::vector<uint8_t>({0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00})),
|
||||
"[json.exception.parse_error.110] parse error at 2: cannot read 8 bytes from vector");
|
||||
"[json.exception.parse_error.110] parse error at 8: unexpected end of input");
|
||||
CHECK_THROWS_WITH(json::from_cbor(std::vector<uint8_t>({0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00})),
|
||||
"[json.exception.parse_error.110] parse error at 2: cannot read 8 bytes from vector");
|
||||
"[json.exception.parse_error.110] parse error at 9: unexpected end of input");
|
||||
}
|
||||
|
||||
SECTION("unsupported bytes")
|
||||
|
@ -1357,12 +1351,6 @@ TEST_CASE("CBOR regressions", "[!throws]")
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
SECTION("improve code coverage")
|
||||
{
|
||||
// exotic edge case
|
||||
CHECK_THROWS_AS(json::check_length(0xffffffffffffffffull, 0xfffffffffffffff0ull, 0xff), json::parse_error);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("CBOR roundtrips", "[hide]")
|
||||
|
@ -1756,7 +1744,7 @@ TEST_CASE("examples from RFC 7049 Appendix A")
|
|||
CHECK(json::parse("\"\\ud800\\udd51\"") == json::from_cbor(std::vector<uint8_t>({0x64, 0xf0, 0x90, 0x85, 0x91})));
|
||||
|
||||
// indefinite length strings
|
||||
CHECK(json::parse("\"streaming\"") == json::from_cbor(std::vector<uint8_t>({0x7f, 0x65, 0x73, 0x74, 0x72, 0x65, 0x61, 0x64, 0x6d, 0x69, 0x6e, 0x67, 0xff})));
|
||||
CHECK(json::parse("\"streaming\"") == json::from_cbor(std::vector<uint8_t>({0x7f, 0x73, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x69, 0x6e, 0x67, 0xff})));
|
||||
}
|
||||
|
||||
SECTION("arrays")
|
||||
|
|
|
@ -32,106 +32,84 @@ SOFTWARE.
|
|||
#include "json.hpp"
|
||||
using nlohmann::json;
|
||||
|
||||
// shortcut to scan a string literal
|
||||
json::lexer::token_type scan_string(const char* s);
|
||||
json::lexer::token_type scan_string(const char* s)
|
||||
{
|
||||
return json::lexer(json::input_adapter::create(s)).scan();
|
||||
}
|
||||
|
||||
TEST_CASE("lexer class")
|
||||
{
|
||||
SECTION("scan")
|
||||
{
|
||||
SECTION("structural characters")
|
||||
{
|
||||
CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("["),
|
||||
1).scan() == json::lexer::token_type::begin_array));
|
||||
CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("]"),
|
||||
1).scan() == json::lexer::token_type::end_array));
|
||||
CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("{"),
|
||||
1).scan() == json::lexer::token_type::begin_object));
|
||||
CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("}"),
|
||||
1).scan() == json::lexer::token_type::end_object));
|
||||
CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>(","),
|
||||
1).scan() == json::lexer::token_type::value_separator));
|
||||
CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>(":"),
|
||||
1).scan() == json::lexer::token_type::name_separator));
|
||||
CHECK((scan_string("[") == json::lexer::token_type::begin_array));
|
||||
CHECK((scan_string("]") == json::lexer::token_type::end_array));
|
||||
CHECK((scan_string("{") == json::lexer::token_type::begin_object));
|
||||
CHECK((scan_string("}") == json::lexer::token_type::end_object));
|
||||
CHECK((scan_string(",") == json::lexer::token_type::value_separator));
|
||||
CHECK((scan_string(":") == json::lexer::token_type::name_separator));
|
||||
}
|
||||
|
||||
SECTION("literal names")
|
||||
{
|
||||
CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("null"),
|
||||
4).scan() == json::lexer::token_type::literal_null));
|
||||
CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("true"),
|
||||
4).scan() == json::lexer::token_type::literal_true));
|
||||
CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("false"),
|
||||
5).scan() == json::lexer::token_type::literal_false));
|
||||
CHECK((scan_string("null") == json::lexer::token_type::literal_null));
|
||||
CHECK((scan_string("true") == json::lexer::token_type::literal_true));
|
||||
CHECK((scan_string("false") == json::lexer::token_type::literal_false));
|
||||
}
|
||||
|
||||
SECTION("numbers")
|
||||
{
|
||||
CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("0"),
|
||||
1).scan() == json::lexer::token_type::value_unsigned));
|
||||
CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("1"),
|
||||
1).scan() == json::lexer::token_type::value_unsigned));
|
||||
CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("2"),
|
||||
1).scan() == json::lexer::token_type::value_unsigned));
|
||||
CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("3"),
|
||||
1).scan() == json::lexer::token_type::value_unsigned));
|
||||
CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("4"),
|
||||
1).scan() == json::lexer::token_type::value_unsigned));
|
||||
CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("5"),
|
||||
1).scan() == json::lexer::token_type::value_unsigned));
|
||||
CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("6"),
|
||||
1).scan() == json::lexer::token_type::value_unsigned));
|
||||
CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("7"),
|
||||
1).scan() == json::lexer::token_type::value_unsigned));
|
||||
CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("8"),
|
||||
1).scan() == json::lexer::token_type::value_unsigned));
|
||||
CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("9"),
|
||||
1).scan() == json::lexer::token_type::value_unsigned));
|
||||
CHECK((scan_string("0") == json::lexer::token_type::value_unsigned));
|
||||
CHECK((scan_string("1") == json::lexer::token_type::value_unsigned));
|
||||
CHECK((scan_string("2") == json::lexer::token_type::value_unsigned));
|
||||
CHECK((scan_string("3") == json::lexer::token_type::value_unsigned));
|
||||
CHECK((scan_string("4") == json::lexer::token_type::value_unsigned));
|
||||
CHECK((scan_string("5") == json::lexer::token_type::value_unsigned));
|
||||
CHECK((scan_string("6") == json::lexer::token_type::value_unsigned));
|
||||
CHECK((scan_string("7") == json::lexer::token_type::value_unsigned));
|
||||
CHECK((scan_string("8") == json::lexer::token_type::value_unsigned));
|
||||
CHECK((scan_string("9") == json::lexer::token_type::value_unsigned));
|
||||
|
||||
CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("-0"),
|
||||
2).scan() == json::lexer::token_type::value_integer));
|
||||
CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("-1"),
|
||||
2).scan() == json::lexer::token_type::value_integer));
|
||||
CHECK((scan_string("-0") == json::lexer::token_type::value_integer));
|
||||
CHECK((scan_string("-1") == json::lexer::token_type::value_integer));
|
||||
|
||||
CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("1.1"),
|
||||
3).scan() == json::lexer::token_type::value_float));
|
||||
CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("-1.1"),
|
||||
4).scan() == json::lexer::token_type::value_float));
|
||||
CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("1E10"),
|
||||
4).scan() == json::lexer::token_type::value_float));
|
||||
CHECK((scan_string("1.1") == json::lexer::token_type::value_float));
|
||||
CHECK((scan_string("-1.1") == json::lexer::token_type::value_float));
|
||||
CHECK((scan_string("1E10") == json::lexer::token_type::value_float));
|
||||
}
|
||||
|
||||
SECTION("whitespace")
|
||||
{
|
||||
// result is end_of_input, because not token is following
|
||||
CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>(" "),
|
||||
1).scan() == json::lexer::token_type::end_of_input));
|
||||
CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("\t"),
|
||||
1).scan() == json::lexer::token_type::end_of_input));
|
||||
CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("\n"),
|
||||
1).scan() == json::lexer::token_type::end_of_input));
|
||||
CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("\r"),
|
||||
1).scan() == json::lexer::token_type::end_of_input));
|
||||
CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>(" \t\n\r\n\t "),
|
||||
7).scan() == json::lexer::token_type::end_of_input));
|
||||
CHECK((scan_string(" ") == json::lexer::token_type::end_of_input));
|
||||
CHECK((scan_string("\t") == json::lexer::token_type::end_of_input));
|
||||
CHECK((scan_string("\n") == json::lexer::token_type::end_of_input));
|
||||
CHECK((scan_string("\r") == json::lexer::token_type::end_of_input));
|
||||
CHECK((scan_string(" \t\n\r\n\t ") == json::lexer::token_type::end_of_input));
|
||||
}
|
||||
}
|
||||
|
||||
SECTION("token_type_name")
|
||||
{
|
||||
CHECK((json::lexer::token_type_name(json::lexer::token_type::uninitialized) == "<uninitialized>"));
|
||||
CHECK((json::lexer::token_type_name(json::lexer::token_type::literal_true) == "true literal"));
|
||||
CHECK((json::lexer::token_type_name(json::lexer::token_type::literal_false) == "false literal"));
|
||||
CHECK((json::lexer::token_type_name(json::lexer::token_type::literal_null) == "null literal"));
|
||||
CHECK((json::lexer::token_type_name(json::lexer::token_type::value_string) == "string literal"));
|
||||
CHECK((json::lexer::token_type_name(json::lexer::token_type::value_unsigned) == "number literal"));
|
||||
CHECK((json::lexer::token_type_name(json::lexer::token_type::value_integer) == "number literal"));
|
||||
CHECK((json::lexer::token_type_name(json::lexer::token_type::value_float) == "number literal"));
|
||||
CHECK((json::lexer::token_type_name(json::lexer::token_type::begin_array) == "'['"));
|
||||
CHECK((json::lexer::token_type_name(json::lexer::token_type::begin_object) == "'{'"));
|
||||
CHECK((json::lexer::token_type_name(json::lexer::token_type::end_array) == "']'"));
|
||||
CHECK((json::lexer::token_type_name(json::lexer::token_type::end_object) == "'}'"));
|
||||
CHECK((json::lexer::token_type_name(json::lexer::token_type::name_separator) == "':'"));
|
||||
CHECK((json::lexer::token_type_name(json::lexer::token_type::value_separator) == "','"));
|
||||
CHECK((json::lexer::token_type_name(json::lexer::token_type::parse_error) == "<parse error>"));
|
||||
CHECK((json::lexer::token_type_name(json::lexer::token_type::end_of_input) == "end of input"));
|
||||
CHECK((std::string(json::lexer::token_type_name(json::lexer::token_type::uninitialized)) == "<uninitialized>"));
|
||||
CHECK((std::string(json::lexer::token_type_name(json::lexer::token_type::literal_true)) == "true literal"));
|
||||
CHECK((std::string(json::lexer::token_type_name(json::lexer::token_type::literal_false)) == "false literal"));
|
||||
CHECK((std::string(json::lexer::token_type_name(json::lexer::token_type::literal_null)) == "null literal"));
|
||||
CHECK((std::string(json::lexer::token_type_name(json::lexer::token_type::value_string)) == "string literal"));
|
||||
CHECK((std::string(json::lexer::token_type_name(json::lexer::token_type::value_unsigned)) == "number literal"));
|
||||
CHECK((std::string(json::lexer::token_type_name(json::lexer::token_type::value_integer)) == "number literal"));
|
||||
CHECK((std::string(json::lexer::token_type_name(json::lexer::token_type::value_float)) == "number literal"));
|
||||
CHECK((std::string(json::lexer::token_type_name(json::lexer::token_type::begin_array)) == "'['"));
|
||||
CHECK((std::string(json::lexer::token_type_name(json::lexer::token_type::begin_object)) == "'{'"));
|
||||
CHECK((std::string(json::lexer::token_type_name(json::lexer::token_type::end_array)) == "']'"));
|
||||
CHECK((std::string(json::lexer::token_type_name(json::lexer::token_type::end_object)) == "'}'"));
|
||||
CHECK((std::string(json::lexer::token_type_name(json::lexer::token_type::name_separator)) == "':'"));
|
||||
CHECK((std::string(json::lexer::token_type_name(json::lexer::token_type::value_separator)) == "','"));
|
||||
CHECK((std::string(json::lexer::token_type_name(json::lexer::token_type::parse_error)) == "<parse error>"));
|
||||
CHECK((std::string(json::lexer::token_type_name(json::lexer::token_type::end_of_input)) == "end of input"));
|
||||
}
|
||||
|
||||
SECTION("parse errors on first character")
|
||||
|
@ -141,8 +119,7 @@ TEST_CASE("lexer class")
|
|||
// create string from the ASCII code
|
||||
const auto s = std::string(1, static_cast<char>(c));
|
||||
// store scan() result
|
||||
const auto res = json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>(s.c_str()),
|
||||
1).scan();
|
||||
const auto res = scan_string(s.c_str());
|
||||
|
||||
switch (c)
|
||||
{
|
||||
|
@ -188,12 +165,23 @@ TEST_CASE("lexer class")
|
|||
}
|
||||
}
|
||||
|
||||
SECTION("very large string")
|
||||
{
|
||||
// strings larger than 1024 bytes yield a resize of the lexer's yytext buffer
|
||||
std::string s("\"");
|
||||
s += std::string(2048, 'x');
|
||||
s += "\"";
|
||||
CHECK((scan_string(s.c_str()) == json::lexer::token_type::value_string));
|
||||
}
|
||||
|
||||
/* NOTE: to_unicode function has been removed
|
||||
SECTION("to_unicode")
|
||||
{
|
||||
// lexer to call to_unicode on
|
||||
json::lexer dummy_lexer(reinterpret_cast<const json::lexer::lexer_char_t*>(""), 0);
|
||||
json::lexer dummy_lexer("", 0);
|
||||
CHECK(dummy_lexer.to_unicode(0x1F4A9) == "💩");
|
||||
CHECK_THROWS_AS(dummy_lexer.to_unicode(0x200000), json::parse_error);
|
||||
CHECK_THROWS_WITH(dummy_lexer.to_unicode(0x200000), "[json.exception.parse_error.103] parse error: code points above 0x10FFFF are invalid");
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -53,7 +53,7 @@ TEST_CASE("convenience functions")
|
|||
const char* escaped)
|
||||
{
|
||||
std::stringstream ss;
|
||||
json::serializer s(ss);
|
||||
json::serializer s(json::output_adapter<char>::create(ss));
|
||||
s.dump_escaped(original);
|
||||
CHECK(ss.str() == escaped);
|
||||
};
|
||||
|
|
|
@ -92,7 +92,7 @@ TEST_CASE("deserialization")
|
|||
ss2 << "[\"foo\",1,2,3,false,{\"one\":1}";
|
||||
CHECK_THROWS_AS(json::parse(ss1), json::parse_error);
|
||||
CHECK_THROWS_WITH(json::parse(ss2),
|
||||
"[json.exception.parse_error.101] parse error at 30: parse error - unexpected end of input; expected ']'");
|
||||
"[json.exception.parse_error.101] parse error at 29: syntax error - unexpected end of input; expected ']'");
|
||||
}
|
||||
|
||||
SECTION("string")
|
||||
|
@ -100,7 +100,7 @@ TEST_CASE("deserialization")
|
|||
json::string_t s = "[\"foo\",1,2,3,false,{\"one\":1}";
|
||||
CHECK_THROWS_AS(json::parse(s), json::parse_error);
|
||||
CHECK_THROWS_WITH(json::parse(s),
|
||||
"[json.exception.parse_error.101] parse error at 29: parse error - unexpected end of input; expected ']'");
|
||||
"[json.exception.parse_error.101] parse error at 29: syntax error - unexpected end of input; expected ']'");
|
||||
}
|
||||
|
||||
SECTION("operator<<")
|
||||
|
@ -111,7 +111,7 @@ TEST_CASE("deserialization")
|
|||
json j;
|
||||
CHECK_THROWS_AS(j << ss1, json::parse_error);
|
||||
CHECK_THROWS_WITH(j << ss2,
|
||||
"[json.exception.parse_error.101] parse error at 30: parse error - unexpected end of input; expected ']'");
|
||||
"[json.exception.parse_error.101] parse error at 29: syntax error - unexpected end of input; expected ']'");
|
||||
}
|
||||
|
||||
SECTION("operator>>")
|
||||
|
@ -122,14 +122,14 @@ TEST_CASE("deserialization")
|
|||
json j;
|
||||
CHECK_THROWS_AS(ss1 >> j, json::parse_error);
|
||||
CHECK_THROWS_WITH(ss2 >> j,
|
||||
"[json.exception.parse_error.101] parse error at 30: parse error - unexpected end of input; expected ']'");
|
||||
"[json.exception.parse_error.101] parse error at 29: syntax error - unexpected end of input; expected ']'");
|
||||
}
|
||||
|
||||
SECTION("user-defined string literal")
|
||||
{
|
||||
CHECK_THROWS_AS("[\"foo\",1,2,3,false,{\"one\":1}"_json, json::parse_error);
|
||||
CHECK_THROWS_WITH("[\"foo\",1,2,3,false,{\"one\":1}"_json,
|
||||
"[json.exception.parse_error.101] parse error at 29: parse error - unexpected end of input; expected ']'");
|
||||
"[json.exception.parse_error.101] parse error at 29: syntax error - unexpected end of input; expected ']'");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -676,14 +676,9 @@ TEST_CASE("MessagePack")
|
|||
const auto result = json::to_msgpack(j);
|
||||
CHECK(result == expected);
|
||||
|
||||
// restore value (reverse array for endianess)
|
||||
double restored;
|
||||
std::reverse(expected.begin(), expected.end());
|
||||
memcpy(&restored, expected.data(), sizeof(double));
|
||||
CHECK(restored == v);
|
||||
|
||||
// roundtrip
|
||||
CHECK(json::from_msgpack(result) == j);
|
||||
CHECK(json::from_msgpack(result) == v);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1038,35 +1033,35 @@ TEST_CASE("MessagePack")
|
|||
CHECK_THROWS_AS(json::from_msgpack(std::vector<uint8_t>({0xcf, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00})), json::parse_error);
|
||||
|
||||
CHECK_THROWS_WITH(json::from_msgpack(std::vector<uint8_t>({0xcc})),
|
||||
"[json.exception.parse_error.110] parse error at 2: cannot read 1 bytes from vector");
|
||||
"[json.exception.parse_error.110] parse error at 2: unexpected end of input");
|
||||
CHECK_THROWS_WITH(json::from_msgpack(std::vector<uint8_t>({0xcd})),
|
||||
"[json.exception.parse_error.110] parse error at 2: cannot read 2 bytes from vector");
|
||||
"[json.exception.parse_error.110] parse error at 2: unexpected end of input");
|
||||
CHECK_THROWS_WITH(json::from_msgpack(std::vector<uint8_t>({0xcd, 0x00})),
|
||||
"[json.exception.parse_error.110] parse error at 2: cannot read 2 bytes from vector");
|
||||
"[json.exception.parse_error.110] parse error at 3: unexpected end of input");
|
||||
CHECK_THROWS_WITH(json::from_msgpack(std::vector<uint8_t>({0xce})),
|
||||
"[json.exception.parse_error.110] parse error at 2: cannot read 4 bytes from vector");
|
||||
"[json.exception.parse_error.110] parse error at 2: unexpected end of input");
|
||||
CHECK_THROWS_WITH(json::from_msgpack(std::vector<uint8_t>({0xce, 0x00})),
|
||||
"[json.exception.parse_error.110] parse error at 2: cannot read 4 bytes from vector");
|
||||
"[json.exception.parse_error.110] parse error at 3: unexpected end of input");
|
||||
CHECK_THROWS_WITH(json::from_msgpack(std::vector<uint8_t>({0xce, 0x00, 0x00})),
|
||||
"[json.exception.parse_error.110] parse error at 2: cannot read 4 bytes from vector");
|
||||
"[json.exception.parse_error.110] parse error at 4: unexpected end of input");
|
||||
CHECK_THROWS_WITH(json::from_msgpack(std::vector<uint8_t>({0xce, 0x00, 0x00, 0x00})),
|
||||
"[json.exception.parse_error.110] parse error at 2: cannot read 4 bytes from vector");
|
||||
"[json.exception.parse_error.110] parse error at 5: unexpected end of input");
|
||||
CHECK_THROWS_WITH(json::from_msgpack(std::vector<uint8_t>({0xcf})),
|
||||
"[json.exception.parse_error.110] parse error at 2: cannot read 8 bytes from vector");
|
||||
"[json.exception.parse_error.110] parse error at 2: unexpected end of input");
|
||||
CHECK_THROWS_WITH(json::from_msgpack(std::vector<uint8_t>({0xcf, 0x00})),
|
||||
"[json.exception.parse_error.110] parse error at 2: cannot read 8 bytes from vector");
|
||||
"[json.exception.parse_error.110] parse error at 3: unexpected end of input");
|
||||
CHECK_THROWS_WITH(json::from_msgpack(std::vector<uint8_t>({0xcf, 0x00, 0x00})),
|
||||
"[json.exception.parse_error.110] parse error at 2: cannot read 8 bytes from vector");
|
||||
"[json.exception.parse_error.110] parse error at 4: unexpected end of input");
|
||||
CHECK_THROWS_WITH(json::from_msgpack(std::vector<uint8_t>({0xcf, 0x00, 0x00, 0x00})),
|
||||
"[json.exception.parse_error.110] parse error at 2: cannot read 8 bytes from vector");
|
||||
"[json.exception.parse_error.110] parse error at 5: unexpected end of input");
|
||||
CHECK_THROWS_WITH(json::from_msgpack(std::vector<uint8_t>({0xcf, 0x00, 0x00, 0x00, 0x00})),
|
||||
"[json.exception.parse_error.110] parse error at 2: cannot read 8 bytes from vector");
|
||||
"[json.exception.parse_error.110] parse error at 6: unexpected end of input");
|
||||
CHECK_THROWS_WITH(json::from_msgpack(std::vector<uint8_t>({0xcf, 0x00, 0x00, 0x00, 0x00, 0x00})),
|
||||
"[json.exception.parse_error.110] parse error at 2: cannot read 8 bytes from vector");
|
||||
"[json.exception.parse_error.110] parse error at 7: unexpected end of input");
|
||||
CHECK_THROWS_WITH(json::from_msgpack(std::vector<uint8_t>({0xcf, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00})),
|
||||
"[json.exception.parse_error.110] parse error at 2: cannot read 8 bytes from vector");
|
||||
"[json.exception.parse_error.110] parse error at 8: unexpected end of input");
|
||||
CHECK_THROWS_WITH(json::from_msgpack(std::vector<uint8_t>({0xcf, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00})),
|
||||
"[json.exception.parse_error.110] parse error at 2: cannot read 8 bytes from vector");
|
||||
"[json.exception.parse_error.110] parse error at 9: unexpected end of input");
|
||||
}
|
||||
|
||||
SECTION("unsupported bytes")
|
||||
|
|
|
@ -596,7 +596,7 @@ TEST_CASE("regression tests")
|
|||
// a parse error because of the EOF.
|
||||
CHECK_THROWS_AS(ss >> j, json::parse_error);
|
||||
CHECK_THROWS_WITH(ss >> j,
|
||||
"[json.exception.parse_error.101] parse error at 1: parse error - unexpected end of input");
|
||||
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input");
|
||||
}
|
||||
|
||||
SECTION("issue #389 - Integer-overflow (OSS-Fuzz issue 267)")
|
||||
|
@ -629,7 +629,7 @@ TEST_CASE("regression tests")
|
|||
std::vector<uint8_t> vec {0x65, 0xf5, 0x0a, 0x48, 0x21};
|
||||
CHECK_THROWS_AS(json::from_cbor(vec), json::parse_error);
|
||||
CHECK_THROWS_WITH(json::from_cbor(vec),
|
||||
"[json.exception.parse_error.110] parse error at 2: cannot read 5 bytes from vector");
|
||||
"[json.exception.parse_error.110] parse error at 6: unexpected end of input");
|
||||
}
|
||||
|
||||
SECTION("issue #407 - Heap-buffer-overflow (OSS-Fuzz issue 343)")
|
||||
|
@ -638,31 +638,31 @@ TEST_CASE("regression tests")
|
|||
std::vector<uint8_t> vec1 {0xcb, 0x8f, 0x0a};
|
||||
CHECK_THROWS_AS(json::from_msgpack(vec1), json::parse_error);
|
||||
CHECK_THROWS_WITH(json::from_msgpack(vec1),
|
||||
"[json.exception.parse_error.110] parse error at 2: cannot read 8 bytes from vector");
|
||||
"[json.exception.parse_error.110] parse error at 4: unexpected end of input");
|
||||
|
||||
// related test case: incomplete float32
|
||||
std::vector<uint8_t> vec2 {0xca, 0x8f, 0x0a};
|
||||
CHECK_THROWS_AS(json::from_msgpack(vec2), json::parse_error);
|
||||
CHECK_THROWS_WITH(json::from_msgpack(vec2),
|
||||
"[json.exception.parse_error.110] parse error at 2: cannot read 4 bytes from vector");
|
||||
"[json.exception.parse_error.110] parse error at 4: unexpected end of input");
|
||||
|
||||
// related test case: incomplete Half-Precision Float (CBOR)
|
||||
std::vector<uint8_t> vec3 {0xf9, 0x8f};
|
||||
CHECK_THROWS_AS(json::from_cbor(vec3), json::parse_error);
|
||||
CHECK_THROWS_WITH(json::from_cbor(vec3),
|
||||
"[json.exception.parse_error.110] parse error at 2: cannot read 2 bytes from vector");
|
||||
"[json.exception.parse_error.110] parse error at 3: unexpected end of input");
|
||||
|
||||
// related test case: incomplete Single-Precision Float (CBOR)
|
||||
std::vector<uint8_t> vec4 {0xfa, 0x8f, 0x0a};
|
||||
CHECK_THROWS_AS(json::from_cbor(vec4), json::parse_error);
|
||||
CHECK_THROWS_WITH(json::from_cbor(vec4),
|
||||
"[json.exception.parse_error.110] parse error at 2: cannot read 4 bytes from vector");
|
||||
"[json.exception.parse_error.110] parse error at 4: unexpected end of input");
|
||||
|
||||
// related test case: incomplete Double-Precision Float (CBOR)
|
||||
std::vector<uint8_t> vec5 {0xfb, 0x8f, 0x0a};
|
||||
CHECK_THROWS_AS(json::from_cbor(vec5), json::parse_error);
|
||||
CHECK_THROWS_WITH(json::from_cbor(vec5),
|
||||
"[json.exception.parse_error.110] parse error at 2: cannot read 8 bytes from vector");
|
||||
"[json.exception.parse_error.110] parse error at 4: unexpected end of input");
|
||||
}
|
||||
|
||||
SECTION("issue #408 - Heap-buffer-overflow (OSS-Fuzz issue 344)")
|
||||
|
@ -671,7 +671,7 @@ TEST_CASE("regression tests")
|
|||
std::vector<uint8_t> vec1 {0x87};
|
||||
CHECK_THROWS_AS(json::from_msgpack(vec1), json::parse_error);
|
||||
CHECK_THROWS_WITH(json::from_msgpack(vec1),
|
||||
"[json.exception.parse_error.110] parse error at 2: cannot read 1 bytes from vector");
|
||||
"[json.exception.parse_error.110] parse error at 2: unexpected end of input");
|
||||
|
||||
// more test cases for MessagePack
|
||||
for (auto b :
|
||||
|
@ -705,10 +705,10 @@ TEST_CASE("regression tests")
|
|||
std::vector<uint8_t> vec2;
|
||||
CHECK_THROWS_AS(json::from_cbor(vec2), json::parse_error);
|
||||
CHECK_THROWS_WITH(json::from_cbor(vec2),
|
||||
"[json.exception.parse_error.110] parse error at 1: cannot read 1 bytes from vector");
|
||||
"[json.exception.parse_error.110] parse error at 1: unexpected end of input");
|
||||
CHECK_THROWS_AS(json::from_msgpack(vec2), json::parse_error);
|
||||
CHECK_THROWS_WITH(json::from_msgpack(vec2),
|
||||
"[json.exception.parse_error.110] parse error at 1: cannot read 1 bytes from vector");
|
||||
"[json.exception.parse_error.110] parse error at 1: unexpected end of input");
|
||||
}
|
||||
|
||||
SECTION("issue #411 - Heap-buffer-overflow (OSS-Fuzz issue 366)")
|
||||
|
@ -717,19 +717,19 @@ TEST_CASE("regression tests")
|
|||
std::vector<uint8_t> vec1 {0x7f};
|
||||
CHECK_THROWS_AS(json::from_cbor(vec1), json::parse_error);
|
||||
CHECK_THROWS_WITH(json::from_cbor(vec1),
|
||||
"[json.exception.parse_error.110] parse error at 2: cannot read 1 bytes from vector");
|
||||
"[json.exception.parse_error.110] parse error at 2: unexpected end of input");
|
||||
|
||||
// related test case: empty array (indefinite length)
|
||||
std::vector<uint8_t> vec2 {0x9f};
|
||||
CHECK_THROWS_AS(json::from_cbor(vec2), json::parse_error);
|
||||
CHECK_THROWS_WITH(json::from_cbor(vec2),
|
||||
"[json.exception.parse_error.110] parse error at 2: cannot read 1 bytes from vector");
|
||||
"[json.exception.parse_error.110] parse error at 2: unexpected end of input");
|
||||
|
||||
// related test case: empty map (indefinite length)
|
||||
std::vector<uint8_t> vec3 {0xbf};
|
||||
CHECK_THROWS_AS(json::from_cbor(vec3), json::parse_error);
|
||||
CHECK_THROWS_WITH(json::from_cbor(vec3),
|
||||
"[json.exception.parse_error.110] parse error at 2: cannot read 1 bytes from vector");
|
||||
"[json.exception.parse_error.110] parse error at 2: unexpected end of input");
|
||||
}
|
||||
|
||||
SECTION("issue #412 - Heap-buffer-overflow (OSS-Fuzz issue 367)")
|
||||
|
@ -763,19 +763,19 @@ TEST_CASE("regression tests")
|
|||
std::vector<uint8_t> vec1 {0x7f, 0x61, 0x61};
|
||||
CHECK_THROWS_AS(json::from_cbor(vec1), json::parse_error);
|
||||
CHECK_THROWS_WITH(json::from_cbor(vec1),
|
||||
"[json.exception.parse_error.110] parse error at 4: cannot read 1 bytes from vector");
|
||||
"[json.exception.parse_error.110] parse error at 4: unexpected end of input");
|
||||
|
||||
// related test case: nonempty array (indefinite length)
|
||||
std::vector<uint8_t> vec2 {0x9f, 0x01};
|
||||
CHECK_THROWS_AS(json::from_cbor(vec2), json::parse_error);
|
||||
CHECK_THROWS_WITH(json::from_cbor(vec2),
|
||||
"[json.exception.parse_error.110] parse error at 3: cannot read 1 bytes from vector");
|
||||
"[json.exception.parse_error.110] parse error at 3: unexpected end of input");
|
||||
|
||||
// related test case: nonempty map (indefinite length)
|
||||
std::vector<uint8_t> vec3 {0xbf, 0x61, 0x61, 0x01};
|
||||
CHECK_THROWS_AS(json::from_cbor(vec3), json::parse_error);
|
||||
CHECK_THROWS_WITH(json::from_cbor(vec3),
|
||||
"[json.exception.parse_error.110] parse error at 5: cannot read 1 bytes from vector");
|
||||
"[json.exception.parse_error.110] parse error at 5: unexpected end of input");
|
||||
}
|
||||
|
||||
SECTION("issue #414 - compare with literal 0)")
|
||||
|
@ -921,6 +921,7 @@ TEST_CASE("regression tests")
|
|||
CHECK(j["bool_vector"].dump() == "[false,true,false,false]");
|
||||
}
|
||||
|
||||
/* NOTE: m_line_buffer is not used any more
|
||||
SECTION("issue #495 - fill_line_buffer incorrectly tests m_stream for eof but not fail or bad bits")
|
||||
{
|
||||
SECTION("setting failbit")
|
||||
|
@ -953,6 +954,7 @@ TEST_CASE("regression tests")
|
|||
CHECK_THROWS_WITH(l.fill_line_buffer(), "[json.exception.parse_error.111] parse error: bad input stream");
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
SECTION("issue #504 - assertion error (OSS-Fuzz 856)")
|
||||
{
|
||||
|
|
|
@ -77,8 +77,8 @@ TEST_CASE("compliance tests from json.org")
|
|||
})
|
||||
{
|
||||
CAPTURE(filename);
|
||||
json j;
|
||||
std::ifstream f(filename);
|
||||
json j;
|
||||
CHECK_THROWS_AS(f >> j, json::parse_error);
|
||||
}
|
||||
}
|
||||
|
@ -93,8 +93,8 @@ TEST_CASE("compliance tests from json.org")
|
|||
})
|
||||
{
|
||||
CAPTURE(filename);
|
||||
json j;
|
||||
std::ifstream f(filename);
|
||||
json j;
|
||||
CHECK_NOTHROW(f >> j);
|
||||
}
|
||||
}
|
||||
|
@ -305,6 +305,7 @@ TEST_CASE("compliance tests from nativejson-benchmark")
|
|||
std::string json_string( (std::istreambuf_iterator<char>(f) ),
|
||||
(std::istreambuf_iterator<char>()) );
|
||||
|
||||
CAPTURE(json_string);
|
||||
json j = json::parse(json_string);
|
||||
CHECK(j.dump() == json_string);
|
||||
}
|
||||
|
|
|
@ -34,17 +34,832 @@ using nlohmann::json;
|
|||
|
||||
#include <fstream>
|
||||
|
||||
// create and check a JSON string with up to four UTF-8 bytes
|
||||
void check_utf8string(bool success_expected, int byte1, int byte2 = -1, int byte3 = -1, int byte4 = -1)
|
||||
{
|
||||
std::string json_string = "\"";
|
||||
|
||||
CAPTURE(byte1);
|
||||
json_string += std::string(1, static_cast<char>(byte1));
|
||||
|
||||
if (byte2 != -1)
|
||||
{
|
||||
CAPTURE(byte2);
|
||||
json_string += std::string(1, static_cast<char>(byte2));
|
||||
}
|
||||
|
||||
if (byte3 != -1)
|
||||
{
|
||||
CAPTURE(byte3);
|
||||
json_string += std::string(1, static_cast<char>(byte3));
|
||||
}
|
||||
|
||||
if (byte4 != -1)
|
||||
{
|
||||
CAPTURE(byte4);
|
||||
json_string += std::string(1, static_cast<char>(byte4));
|
||||
}
|
||||
|
||||
json_string += "\"";
|
||||
|
||||
CAPTURE(json_string);
|
||||
|
||||
if (success_expected)
|
||||
{
|
||||
CHECK_NOTHROW(json::parse(json_string));
|
||||
}
|
||||
else
|
||||
{
|
||||
CHECK_THROWS_AS(json::parse(json_string), json::parse_error);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("Unicode", "[hide]")
|
||||
{
|
||||
SECTION("full enumeration of Unicode code points")
|
||||
SECTION("RFC 3629")
|
||||
{
|
||||
// lexer to call to_unicode on
|
||||
json::lexer dummy_lexer(reinterpret_cast<const json::lexer::lexer_char_t*>(""), 0);
|
||||
/*
|
||||
RFC 3629 describes in Sect. 4 the syntax of UTF-8 byte sequences as
|
||||
follows:
|
||||
|
||||
A UTF-8 string is a sequence of octets representing a sequence of UCS
|
||||
characters. An octet sequence is valid UTF-8 only if it matches the
|
||||
following syntax, which is derived from the rules for encoding UTF-8
|
||||
and is expressed in the ABNF of [RFC2234].
|
||||
|
||||
UTF8-octets = *( UTF8-char )
|
||||
UTF8-char = UTF8-1 / UTF8-2 / UTF8-3 / UTF8-4
|
||||
UTF8-1 = %x00-7F
|
||||
UTF8-2 = %xC2-DF UTF8-tail
|
||||
UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
|
||||
%xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
|
||||
UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
|
||||
%xF4 %x80-8F 2( UTF8-tail )
|
||||
UTF8-tail = %x80-BF
|
||||
*/
|
||||
|
||||
SECTION("ill-formed first byte")
|
||||
{
|
||||
for (int byte1 = 0x80; byte1 <= 0xC1; ++byte1)
|
||||
{
|
||||
check_utf8string(false, byte1);
|
||||
}
|
||||
|
||||
for (int byte1 = 0xF5; byte1 <= 0xFF; ++byte1)
|
||||
{
|
||||
check_utf8string(false, byte1);
|
||||
}
|
||||
}
|
||||
|
||||
SECTION("UTF8-1 (x00-x7F)")
|
||||
{
|
||||
SECTION("well-formed")
|
||||
{
|
||||
for (int byte1 = 0x00; byte1 <= 0x7F; ++byte1)
|
||||
{
|
||||
// unescaped control characters are parse errors in JSON
|
||||
if (0x00 <= byte1 and byte1 <= 0x1F)
|
||||
{
|
||||
check_utf8string(false, byte1);
|
||||
continue;
|
||||
}
|
||||
|
||||
// a single quote is a parse error in JSON
|
||||
if (byte1 == 0x22)
|
||||
{
|
||||
check_utf8string(false, byte1);
|
||||
continue;
|
||||
}
|
||||
|
||||
// a single backslash is a parse error in JSON
|
||||
if (byte1 == 0x5C)
|
||||
{
|
||||
check_utf8string(false, byte1);
|
||||
continue;
|
||||
}
|
||||
|
||||
// all other characters are OK
|
||||
check_utf8string(true, byte1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SECTION("UTF8-2 (xC2-xDF UTF8-tail)")
|
||||
{
|
||||
SECTION("well-formed")
|
||||
{
|
||||
for (int byte1 = 0xC2; byte1 <= 0xDF; ++byte1)
|
||||
{
|
||||
for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2)
|
||||
{
|
||||
check_utf8string(true, byte1, byte2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SECTION("ill-formed: missing second byte")
|
||||
{
|
||||
for (int byte1 = 0xC2; byte1 <= 0xDF; ++byte1)
|
||||
{
|
||||
check_utf8string(false, byte1);
|
||||
}
|
||||
}
|
||||
|
||||
SECTION("ill-formed: wrong second byte")
|
||||
{
|
||||
for (int byte1 = 0xC2; byte1 <= 0xDF; ++byte1)
|
||||
{
|
||||
for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2)
|
||||
{
|
||||
// skip correct second byte
|
||||
if (0x80 <= byte2 and byte2 <= 0xBF)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
check_utf8string(false, byte1, byte2);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SECTION("UTF8-3 (xE0 xA0-BF UTF8-tail)")
|
||||
{
|
||||
SECTION("well-formed")
|
||||
{
|
||||
for (int byte1 = 0xE0; byte1 <= 0xE0; ++byte1)
|
||||
{
|
||||
for (int byte2 = 0xA0; byte2 <= 0xBF; ++byte2)
|
||||
{
|
||||
for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
|
||||
{
|
||||
check_utf8string(true, byte1, byte2, byte3);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SECTION("ill-formed: missing second byte")
|
||||
{
|
||||
for (int byte1 = 0xE0; byte1 <= 0xE0; ++byte1)
|
||||
{
|
||||
check_utf8string(false, byte1);
|
||||
}
|
||||
}
|
||||
|
||||
SECTION("ill-formed: missing third byte")
|
||||
{
|
||||
for (int byte1 = 0xE0; byte1 <= 0xE0; ++byte1)
|
||||
{
|
||||
for (int byte2 = 0xA0; byte2 <= 0xBF; ++byte2)
|
||||
{
|
||||
check_utf8string(false, byte1, byte2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SECTION("ill-formed: wrong second byte")
|
||||
{
|
||||
for (int byte1 = 0xE0; byte1 <= 0xE0; ++byte1)
|
||||
{
|
||||
for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2)
|
||||
{
|
||||
// skip correct second byte
|
||||
if (0xA0 <= byte2 and byte2 <= 0xBF)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
|
||||
{
|
||||
check_utf8string(false, byte1, byte2, byte3);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SECTION("ill-formed: wrong third byte")
|
||||
{
|
||||
for (int byte1 = 0xE0; byte1 <= 0xE0; ++byte1)
|
||||
{
|
||||
for (int byte2 = 0xA0; byte2 <= 0xBF; ++byte2)
|
||||
{
|
||||
for (int byte3 = 0x00; byte3 <= 0xFF; ++byte3)
|
||||
{
|
||||
// skip correct third byte
|
||||
if (0x80 <= byte3 and byte3 <= 0xBF)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
check_utf8string(false, byte1, byte2, byte3);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SECTION("UTF8-3 (xE1-xEC UTF8-tail UTF8-tail)")
|
||||
{
|
||||
SECTION("well-formed")
|
||||
{
|
||||
for (int byte1 = 0xE1; byte1 <= 0xEC; ++byte1)
|
||||
{
|
||||
for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2)
|
||||
{
|
||||
for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
|
||||
{
|
||||
check_utf8string(true, byte1, byte2, byte3);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SECTION("ill-formed: missing second byte")
|
||||
{
|
||||
for (int byte1 = 0xE1; byte1 <= 0xEC; ++byte1)
|
||||
{
|
||||
check_utf8string(false, byte1);
|
||||
}
|
||||
}
|
||||
|
||||
SECTION("ill-formed: missing third byte")
|
||||
{
|
||||
for (int byte1 = 0xE1; byte1 <= 0xEC; ++byte1)
|
||||
{
|
||||
for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2)
|
||||
{
|
||||
check_utf8string(false, byte1, byte2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SECTION("ill-formed: wrong second byte")
|
||||
{
|
||||
for (int byte1 = 0xE1; byte1 <= 0xEC; ++byte1)
|
||||
{
|
||||
for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2)
|
||||
{
|
||||
// skip correct second byte
|
||||
if (0x80 <= byte2 and byte2 <= 0xBF)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
|
||||
{
|
||||
check_utf8string(false, byte1, byte2, byte3);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SECTION("ill-formed: wrong third byte")
|
||||
{
|
||||
for (int byte1 = 0xE1; byte1 <= 0xEC; ++byte1)
|
||||
{
|
||||
for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2)
|
||||
{
|
||||
for (int byte3 = 0x00; byte3 <= 0xFF; ++byte3)
|
||||
{
|
||||
// skip correct third byte
|
||||
if (0x80 <= byte3 and byte3 <= 0xBF)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
check_utf8string(false, byte1, byte2, byte3);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SECTION("UTF8-3 (xED x80-9F UTF8-tail)")
|
||||
{
|
||||
SECTION("well-formed")
|
||||
{
|
||||
for (int byte1 = 0xED; byte1 <= 0xED; ++byte1)
|
||||
{
|
||||
for (int byte2 = 0x80; byte2 <= 0x9F; ++byte2)
|
||||
{
|
||||
for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
|
||||
{
|
||||
check_utf8string(true, byte1, byte2, byte3);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SECTION("ill-formed: missing second byte")
|
||||
{
|
||||
for (int byte1 = 0xED; byte1 <= 0xED; ++byte1)
|
||||
{
|
||||
check_utf8string(false, byte1);
|
||||
}
|
||||
}
|
||||
|
||||
SECTION("ill-formed: missing third byte")
|
||||
{
|
||||
for (int byte1 = 0xED; byte1 <= 0xED; ++byte1)
|
||||
{
|
||||
for (int byte2 = 0x80; byte2 <= 0x9F; ++byte2)
|
||||
{
|
||||
check_utf8string(false, byte1, byte2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SECTION("ill-formed: wrong second byte")
|
||||
{
|
||||
for (int byte1 = 0xED; byte1 <= 0xED; ++byte1)
|
||||
{
|
||||
for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2)
|
||||
{
|
||||
// skip correct second byte
|
||||
if (0x80 <= byte2 and byte2 <= 0x9F)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
|
||||
{
|
||||
check_utf8string(false, byte1, byte2, byte3);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SECTION("ill-formed: wrong third byte")
|
||||
{
|
||||
for (int byte1 = 0xED; byte1 <= 0xED; ++byte1)
|
||||
{
|
||||
for (int byte2 = 0x80; byte2 <= 0x9F; ++byte2)
|
||||
{
|
||||
for (int byte3 = 0x00; byte3 <= 0xFF; ++byte3)
|
||||
{
|
||||
// skip correct third byte
|
||||
if (0x80 <= byte3 and byte3 <= 0xBF)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
check_utf8string(false, byte1, byte2, byte3);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SECTION("UTF8-3 (xEE-xEF UTF8-tail UTF8-tail)")
|
||||
{
|
||||
SECTION("well-formed")
|
||||
{
|
||||
for (int byte1 = 0xEE; byte1 <= 0xEF; ++byte1)
|
||||
{
|
||||
for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2)
|
||||
{
|
||||
for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
|
||||
{
|
||||
check_utf8string(true, byte1, byte2, byte3);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SECTION("ill-formed: missing second byte")
|
||||
{
|
||||
for (int byte1 = 0xEE; byte1 <= 0xEF; ++byte1)
|
||||
{
|
||||
check_utf8string(false, byte1);
|
||||
}
|
||||
}
|
||||
|
||||
SECTION("ill-formed: missing third byte")
|
||||
{
|
||||
for (int byte1 = 0xEE; byte1 <= 0xEF; ++byte1)
|
||||
{
|
||||
for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2)
|
||||
{
|
||||
check_utf8string(false, byte1, byte2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SECTION("ill-formed: wrong second byte")
|
||||
{
|
||||
for (int byte1 = 0xEE; byte1 <= 0xEF; ++byte1)
|
||||
{
|
||||
for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2)
|
||||
{
|
||||
// skip correct second byte
|
||||
if (0x80 <= byte2 and byte2 <= 0xBF)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
|
||||
{
|
||||
check_utf8string(false, byte1, byte2, byte3);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SECTION("ill-formed: wrong third byte")
|
||||
{
|
||||
for (int byte1 = 0xEE; byte1 <= 0xEF; ++byte1)
|
||||
{
|
||||
for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2)
|
||||
{
|
||||
for (int byte3 = 0x00; byte3 <= 0xFF; ++byte3)
|
||||
{
|
||||
// skip correct third byte
|
||||
if (0x80 <= byte3 and byte3 <= 0xBF)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
check_utf8string(false, byte1, byte2, byte3);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SECTION("UTF8-4 (xF0 x90-BF UTF8-tail UTF8-tail)")
|
||||
{
|
||||
SECTION("well-formed")
|
||||
{
|
||||
for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1)
|
||||
{
|
||||
for (int byte2 = 0x90; byte2 <= 0xBF; ++byte2)
|
||||
{
|
||||
for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
|
||||
{
|
||||
for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4)
|
||||
{
|
||||
check_utf8string(true, byte1, byte2, byte3, byte4);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SECTION("ill-formed: missing second byte")
|
||||
{
|
||||
for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1)
|
||||
{
|
||||
check_utf8string(false, byte1);
|
||||
}
|
||||
}
|
||||
|
||||
SECTION("ill-formed: missing third byte")
|
||||
{
|
||||
for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1)
|
||||
{
|
||||
for (int byte2 = 0x90; byte2 <= 0xBF; ++byte2)
|
||||
{
|
||||
check_utf8string(false, byte1, byte2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SECTION("ill-formed: missing fourth byte")
|
||||
{
|
||||
for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1)
|
||||
{
|
||||
for (int byte2 = 0x90; byte2 <= 0xBF; ++byte2)
|
||||
{
|
||||
for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
|
||||
{
|
||||
check_utf8string(false, byte1, byte2, byte3);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SECTION("ill-formed: wrong second byte")
|
||||
{
|
||||
for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1)
|
||||
{
|
||||
for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2)
|
||||
{
|
||||
// skip correct second byte
|
||||
if (0x90 <= byte2 and byte2 <= 0xBF)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
|
||||
{
|
||||
for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4)
|
||||
{
|
||||
check_utf8string(false, byte1, byte2, byte3, byte4);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SECTION("ill-formed: wrong third byte")
|
||||
{
|
||||
for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1)
|
||||
{
|
||||
for (int byte2 = 0x90; byte2 <= 0xBF; ++byte2)
|
||||
{
|
||||
for (int byte3 = 0x00; byte3 <= 0xFF; ++byte3)
|
||||
{
|
||||
// skip correct third byte
|
||||
if (0x80 <= byte3 and byte3 <= 0xBF)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4)
|
||||
{
|
||||
check_utf8string(false, byte1, byte2, byte3, byte4);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SECTION("ill-formed: wrong fourth byte")
|
||||
{
|
||||
for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1)
|
||||
{
|
||||
for (int byte2 = 0x90; byte2 <= 0xBF; ++byte2)
|
||||
{
|
||||
for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
|
||||
{
|
||||
for (int byte4 = 0x00; byte4 <= 0xFF; ++byte4)
|
||||
{
|
||||
// skip fourth second byte
|
||||
if (0x80 <= byte3 and byte3 <= 0xBF)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
check_utf8string(false, byte1, byte2, byte3, byte4);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SECTION("UTF8-4 (xF1-F3 UTF8-tail UTF8-tail UTF8-tail)")
|
||||
{
|
||||
SECTION("well-formed")
|
||||
{
|
||||
for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1)
|
||||
{
|
||||
for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2)
|
||||
{
|
||||
for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
|
||||
{
|
||||
for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4)
|
||||
{
|
||||
check_utf8string(true, byte1, byte2, byte3, byte4);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SECTION("ill-formed: missing second byte")
|
||||
{
|
||||
for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1)
|
||||
{
|
||||
check_utf8string(false, byte1);
|
||||
}
|
||||
}
|
||||
|
||||
SECTION("ill-formed: missing third byte")
|
||||
{
|
||||
for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1)
|
||||
{
|
||||
for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2)
|
||||
{
|
||||
check_utf8string(false, byte1, byte2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SECTION("ill-formed: missing fourth byte")
|
||||
{
|
||||
for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1)
|
||||
{
|
||||
for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2)
|
||||
{
|
||||
for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
|
||||
{
|
||||
check_utf8string(false, byte1, byte2, byte3);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SECTION("ill-formed: wrong second byte")
|
||||
{
|
||||
for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1)
|
||||
{
|
||||
for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2)
|
||||
{
|
||||
// skip correct second byte
|
||||
if (0x80 <= byte2 and byte2 <= 0xBF)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
|
||||
{
|
||||
for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4)
|
||||
{
|
||||
check_utf8string(false, byte1, byte2, byte3, byte4);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SECTION("ill-formed: wrong third byte")
|
||||
{
|
||||
for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1)
|
||||
{
|
||||
for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2)
|
||||
{
|
||||
for (int byte3 = 0x00; byte3 <= 0xFF; ++byte3)
|
||||
{
|
||||
// skip correct third byte
|
||||
if (0x80 <= byte3 and byte3 <= 0xBF)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4)
|
||||
{
|
||||
check_utf8string(false, byte1, byte2, byte3, byte4);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SECTION("ill-formed: wrong fourth byte")
|
||||
{
|
||||
for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1)
|
||||
{
|
||||
for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2)
|
||||
{
|
||||
for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
|
||||
{
|
||||
for (int byte4 = 0x00; byte4 <= 0xFF; ++byte4)
|
||||
{
|
||||
// skip correct fourth byte
|
||||
if (0x80 <= byte3 and byte3 <= 0xBF)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
check_utf8string(false, byte1, byte2, byte3, byte4);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SECTION("UTF8-4 (xF4 x80-8F UTF8-tail UTF8-tail)")
|
||||
{
|
||||
SECTION("well-formed")
|
||||
{
|
||||
for (int byte1 = 0xF4; byte1 <= 0xF4; ++byte1)
|
||||
{
|
||||
for (int byte2 = 0x80; byte2 <= 0x8F; ++byte2)
|
||||
{
|
||||
for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
|
||||
{
|
||||
for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4)
|
||||
{
|
||||
check_utf8string(true, byte1, byte2, byte3, byte4);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SECTION("ill-formed: missing second byte")
|
||||
{
|
||||
for (int byte1 = 0xF4; byte1 <= 0xF4; ++byte1)
|
||||
{
|
||||
check_utf8string(false, byte1);
|
||||
}
|
||||
}
|
||||
|
||||
SECTION("ill-formed: missing third byte")
|
||||
{
|
||||
for (int byte1 = 0xF4; byte1 <= 0xF4; ++byte1)
|
||||
{
|
||||
for (int byte2 = 0x80; byte2 <= 0x8F; ++byte2)
|
||||
{
|
||||
check_utf8string(false, byte1, byte2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SECTION("ill-formed: missing fourth byte")
|
||||
{
|
||||
for (int byte1 = 0xF4; byte1 <= 0xF4; ++byte1)
|
||||
{
|
||||
for (int byte2 = 0x80; byte2 <= 0x8F; ++byte2)
|
||||
{
|
||||
for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
|
||||
{
|
||||
check_utf8string(false, byte1, byte2, byte3);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SECTION("ill-formed: wrong second byte")
|
||||
{
|
||||
for (int byte1 = 0xF4; byte1 <= 0xF4; ++byte1)
|
||||
{
|
||||
for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2)
|
||||
{
|
||||
// skip correct second byte
|
||||
if (0x80 <= byte2 and byte2 <= 0x8F)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
|
||||
{
|
||||
for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4)
|
||||
{
|
||||
check_utf8string(false, byte1, byte2, byte3, byte4);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SECTION("ill-formed: wrong third byte")
|
||||
{
|
||||
for (int byte1 = 0xF4; byte1 <= 0xF4; ++byte1)
|
||||
{
|
||||
for (int byte2 = 0x80; byte2 <= 0x8F; ++byte2)
|
||||
{
|
||||
for (int byte3 = 0x00; byte3 <= 0xFF; ++byte3)
|
||||
{
|
||||
// skip correct third byte
|
||||
if (0x80 <= byte3 and byte3 <= 0xBF)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4)
|
||||
{
|
||||
check_utf8string(false, byte1, byte2, byte3, byte4);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SECTION("ill-formed: wrong fourth byte")
|
||||
{
|
||||
for (int byte1 = 0xF4; byte1 <= 0xF4; ++byte1)
|
||||
{
|
||||
for (int byte2 = 0x80; byte2 <= 0x8F; ++byte2)
|
||||
{
|
||||
for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
|
||||
{
|
||||
for (int byte4 = 0x00; byte4 <= 0xFF; ++byte4)
|
||||
{
|
||||
// skip correct fourth byte
|
||||
if (0x80 <= byte3 and byte3 <= 0xBF)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
check_utf8string(false, byte1, byte2, byte3, byte4);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SECTION("\\uxxxx sequences")
|
||||
{
|
||||
// create an escaped string from a code point
|
||||
const auto codepoint_to_unicode = [](std::size_t cp)
|
||||
{
|
||||
// copd points are represented as a six-character sequence: a
|
||||
// code points are represented as a six-character sequence: a
|
||||
// reverse solidus, followed by the lowercase letter u, followed
|
||||
// by four hexadecimal digits that encode the character's code
|
||||
// point
|
||||
|
@ -53,70 +868,100 @@ TEST_CASE("Unicode", "[hide]")
|
|||
return ss.str();
|
||||
};
|
||||
|
||||
// generate all UTF-8 code points; in total, 1112064 code points are
|
||||
// generated: 0x1FFFFF code points - 2048 invalid values between
|
||||
// 0xD800 and 0xDFFF.
|
||||
for (std::size_t cp = 0; cp <= 0x10FFFFu; ++cp)
|
||||
SECTION("correct sequences")
|
||||
{
|
||||
// The Unicode standard permanently reserves these code point
|
||||
// values for UTF-16 encoding of the high and low surrogates, and
|
||||
// they will never be assigned a character, so there should be no
|
||||
// reason to encode them. The official Unicode standard says that
|
||||
// no UTF forms, including UTF-16, can encode these code points.
|
||||
if (cp >= 0xD800u and cp <= 0xDFFFu)
|
||||
// generate all UTF-8 code points; in total, 1112064 code points are
|
||||
// generated: 0x1FFFFF code points - 2048 invalid values between
|
||||
// 0xD800 and 0xDFFF.
|
||||
for (std::size_t cp = 0; cp <= 0x10FFFFu; ++cp)
|
||||
{
|
||||
// if we would not skip these code points, we would get a
|
||||
// "missing low surrogate" exception
|
||||
continue;
|
||||
}
|
||||
// string to store the code point as in \uxxxx format
|
||||
std::string json_text = "\"";
|
||||
|
||||
// string to store the code point as in \uxxxx format
|
||||
std::string escaped_string;
|
||||
// string to store the code point as unescaped character sequence
|
||||
std::string unescaped_string;
|
||||
|
||||
if (cp < 0x10000u)
|
||||
{
|
||||
// code points in the Basic Multilingual Plane can be
|
||||
// represented with one \\uxxxx sequence
|
||||
escaped_string = codepoint_to_unicode(cp);
|
||||
|
||||
// All Unicode characters may be placed within the quotation
|
||||
// marks, except for the characters that must be escaped:
|
||||
// quotation mark, reverse solidus, and the control characters
|
||||
// (U+0000 through U+001F); we ignore these code points as
|
||||
// they are checked with codepoint_to_unicode.
|
||||
if (cp > 0x1f and cp != 0x22 and cp != 0x5c)
|
||||
// decide whether to use one or two \uxxxx sequences
|
||||
if (cp < 0x10000u)
|
||||
{
|
||||
unescaped_string = dummy_lexer.to_unicode(cp);
|
||||
// The Unicode standard permanently reserves these code point
|
||||
// values for UTF-16 encoding of the high and low surrogates, and
|
||||
// they will never be assigned a character, so there should be no
|
||||
// reason to encode them. The official Unicode standard says that
|
||||
// no UTF forms, including UTF-16, can encode these code points.
|
||||
if (cp >= 0xD800u and cp <= 0xDFFFu)
|
||||
{
|
||||
// if we would not skip these code points, we would get a
|
||||
// "missing low surrogate" exception
|
||||
continue;
|
||||
}
|
||||
|
||||
// code points in the Basic Multilingual Plane can be
|
||||
// represented with one \uxxxx sequence
|
||||
json_text += codepoint_to_unicode(cp);
|
||||
}
|
||||
else
|
||||
{
|
||||
// To escape an extended character that is not in the Basic
|
||||
// Multilingual Plane, the character is represented as a
|
||||
// 12-character sequence, encoding the UTF-16 surrogate pair
|
||||
const auto codepoint1 = 0xd800u + (((cp - 0x10000u) >> 10) & 0x3ffu);
|
||||
const auto codepoint2 = 0xdc00u + ((cp - 0x10000u) & 0x3ffu);
|
||||
json_text += codepoint_to_unicode(codepoint1) + codepoint_to_unicode(codepoint2);
|
||||
}
|
||||
|
||||
json_text += "\"";
|
||||
CAPTURE(json_text);
|
||||
CHECK_NOTHROW(json::parse(json_text));
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
SECTION("incorrect sequences")
|
||||
{
|
||||
SECTION("high surrogate without low surrogate")
|
||||
{
|
||||
// D800..DBFF are high surrogates and must be followed by low
|
||||
// surrogates DC00..DFFF; here, nothing follows
|
||||
for (std::size_t cp = 0xD800u; cp <= 0xDBFFu; ++cp)
|
||||
{
|
||||
std::string json_text = "\"" + codepoint_to_unicode(cp) + "\"";
|
||||
CAPTURE(json_text);
|
||||
CHECK_THROWS_AS(json::parse(json_text), json::parse_error);
|
||||
}
|
||||
}
|
||||
else
|
||||
|
||||
SECTION("high surrogate with wrong low surrogate")
|
||||
{
|
||||
// To escape an extended character that is not in the Basic
|
||||
// Multilingual Plane, the character is represented as a
|
||||
// 12-character sequence, encoding the UTF-16 surrogate pair
|
||||
const auto codepoint1 = 0xd800u + (((cp - 0x10000u) >> 10) & 0x3ffu);
|
||||
const auto codepoint2 = 0xdc00u + ((cp - 0x10000u) & 0x3ffu);
|
||||
escaped_string = codepoint_to_unicode(codepoint1);
|
||||
escaped_string += codepoint_to_unicode(codepoint2);
|
||||
unescaped_string += dummy_lexer.to_unicode(codepoint1, codepoint2);
|
||||
// D800..DBFF are high surrogates and must be followed by low
|
||||
// surrogates DC00..DFFF; here a different sequence follows
|
||||
for (std::size_t cp1 = 0xD800u; cp1 <= 0xDBFFu; ++cp1)
|
||||
{
|
||||
for (std::size_t cp2 = 0x0000u; cp2 <= 0xFFFFu; ++cp2)
|
||||
{
|
||||
if (0xDC00u <= cp2 and cp2 <= 0xDFFFu)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
std::string json_text = "\"" + codepoint_to_unicode(cp1) + codepoint_to_unicode(cp2) + "\"";
|
||||
CAPTURE(json_text);
|
||||
CHECK_THROWS_AS(json::parse(json_text), json::parse_error);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// all other code points are valid and must not yield parse errors
|
||||
CAPTURE(cp);
|
||||
CAPTURE(escaped_string);
|
||||
CAPTURE(unescaped_string);
|
||||
SECTION("low surrogate without high surrogate")
|
||||
{
|
||||
// low surrogates DC00..DFFF must follow high surrogates; here,
|
||||
// they occur alone
|
||||
for (std::size_t cp = 0xDC00u; cp <= 0xDFFFu; ++cp)
|
||||
{
|
||||
std::string json_text = "\"" + codepoint_to_unicode(cp) + "\"";
|
||||
CAPTURE(json_text);
|
||||
CHECK_THROWS_AS(json::parse(json_text), json::parse_error);
|
||||
}
|
||||
}
|
||||
|
||||
json j1, j2, j3, j4;
|
||||
CHECK_NOTHROW(j1 = json::parse("\"" + escaped_string + "\""));
|
||||
CHECK_NOTHROW(j2 = json::parse(j1.dump()));
|
||||
CHECK(j1 == j2);
|
||||
|
||||
CHECK_NOTHROW(j3 = json::parse("\"" + unescaped_string + "\""));
|
||||
CHECK_NOTHROW(j4 = json::parse(j3.dump()));
|
||||
CHECK(j3 == j4);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
SECTION("read all unicode characters")
|
||||
|
|
Loading…
Reference in a new issue