From 40160f482a2cbd96b1f511397d14905e108a9ca1 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Fri, 24 Mar 2017 19:49:02 +0100 Subject: [PATCH 01/44] :construction: manual lexer This commit removed the re2c lexer and replaced it by a manual version. Its integration is not yet complete: number parsing does not respect locales or overflows. Furthermore, parsing does not need to end with EOF. Therefore, a lot of test cases fail. The idea is to push this branch forward so we can conduct performance comparisons. So far, a nice side effect are better diagnosis messages in case of parse errors. --- Makefile | 13 +- errors.txt | 7 + src/json.hpp | 2409 ++---- src/json.hpp.re2c | 12863 ---------------------------- test/src/unit-class_lexer.cpp | 94 +- test/src/unit-class_parser.cpp | 146 +- test/src/unit-deserialization.cpp | 10 +- test/src/unit-regression.cpp | 4 +- test/src/unit-unicode.cpp | 4 +- 9 files changed, 851 insertions(+), 14699 deletions(-) create mode 100644 errors.txt delete mode 100644 src/json.hpp.re2c diff --git a/Makefile b/Makefile index 20857022..38d40eda 100644 --- a/Makefile +++ b/Makefile @@ -1,9 +1,5 @@ .PHONY: pretty clean ChangeLog.md -# used programs -RE2C := $(shell command -v re2c 2> /dev/null) -SED = sed - # main target all: $(MAKE) -C test @@ -183,13 +179,6 @@ clang_sanitize: clean # maintainer targets ########################################################################## -# create scanner with re2c -re2c: src/json.hpp.re2c -ifndef RE2C - $(error "re2c is not available, please install re2c") -endif - $(RE2C) -W --utf-8 --encoding-policy fail --bit-vectors --nested-ifs --no-debug-info $< | $(SED) '1d' > src/json.hpp - # pretty printer pretty: astyle --style=allman --indent=spaces=4 --indent-modifiers \ @@ -197,7 +186,7 @@ pretty: --indent-col1-comments --pad-oper --pad-header --align-pointer=type \ --align-reference=type --add-brackets --convert-tabs --close-templates \ --lineend=linux --preserve-date --suffix=none --formatted \ - src/json.hpp src/json.hpp.re2c test/src/*.cpp \ + src/json.hpp test/src/*.cpp \ benchmarks/benchmarks.cpp doc/examples/*.cpp diff --git a/errors.txt b/errors.txt new file mode 100644 index 00000000..d3a6c6db --- /dev/null +++ b/errors.txt @@ -0,0 +1,7 @@ +- test/test-class_parser + - 617 failed +- test/test-regression + - 11 failed +- test/test-testsuites + - 43 failed + diff --git a/src/json.hpp b/src/json.hpp index 1ccbfa61..b86bee11 100644 --- a/src/json.hpp +++ b/src/json.hpp @@ -89,6 +89,10 @@ SOFTWARE. #define JSON_CATCH(exception) if(false) #endif +// manual branch prediction +#define JSON_LIKELY(x) __builtin_expect(!!(x), 1) +#define JSON_UNLIKELY(x) __builtin_expect(!!(x), 0) + /*! @brief namespace for Niels Lohmann @see https://github.com/nlohmann @@ -1968,7 +1972,7 @@ class basic_json default: { - if (t == value_t::null) + if (JSON_UNLIKELY(t == value_t::null)) { JSON_THROW(other_error(500, "961c151d2e87f2686a955a9be24d316f1362bf21 2.1.1")); // LCOV_EXCL_LINE } @@ -10213,12 +10217,195 @@ class basic_json /*! @brief lexical analysis - This class organizes the lexical analysis during JSON deserialization. The - core of it is a scanner generated by [re2c](http://re2c.org) that - processes a buffer and recognizes tokens according to RFC 7159. + This class organizes the lexical analysis during JSON deserialization. */ class lexer { + private: + + /// abstract input adapter interface + class input_adapter + { + public: + virtual int get_character() = 0; + virtual std::string read(size_t offset, size_t length) = 0; + virtual ~input_adapter() {} + }; + + /// input adapter for cached stream input + class cached_input_stream_adapter : public input_adapter + { + public: + cached_input_stream_adapter(std::istream& i) + : is(i), start_position(is.tellg()), + buffer(1024 * 1024, std::char_traits::eof()) + { + // immediately abort if stream is erroneous + if (JSON_UNLIKELY(i.fail())) + { + JSON_THROW(parse_error(111, 0, "bad input stream")); + } + + // initial fill; unfilled buffer charaters remain EOF + is.read(buffer.data(), static_cast(buffer.size())); + + // ignore byte-order mark + if (buffer[0] == '\xEF' and buffer[1] == '\xBB' and buffer[2] == '\xBF') + { + buffer_pos += 3; + processed_chars += 3; + } + } + + ~cached_input_stream_adapter() override + { + // clear stream flags + is.clear(); + // set stream after last processed char + is.seekg(start_position + static_cast(processed_chars - 1)); + } + + int get_character() override + { + // check if refilling is neccessary + if (JSON_UNLIKELY(buffer_pos == buffer.size())) + { + // refill + is.read(reinterpret_cast(buffer.data()), static_cast(buffer.size())); + // set unfilled characters to EOF + std::fill_n(buffer.begin() + is.gcount(), + buffer.size() - static_cast(is.gcount()), + std::char_traits::eof()); + // the buffer is ready + buffer_pos = 0; + } + + ++processed_chars; + return buffer[buffer_pos++]; + } + + std::string read(size_t offset, size_t length) override + { + // create buffer + std::string result(length, '\0'); + + // save stream position + auto current_pos = is.tellg(); + // save stream flags + auto flags = is.rdstate(); + + // clear stream flags + is.clear(); + // set stream position + is.seekg(static_cast(offset)); + // read bytes + is.read(&result[0], static_cast(length)); + + // reset stream position + is.seekg(current_pos); + // reset stream flags + is.setstate(flags); + + return result; + } + + private: + std::istream& is; + + // chars returned via get_character() + size_t processed_chars = 0; + // chars processed in the current buffer + size_t buffer_pos = 0; + + // position of the stream when we started + const std::streampos start_position; + + // internal buffer + std::vector buffer; + }; + + /// input adapter for uncached stream input + class input_stream_adapter : public input_adapter + { + public: + input_stream_adapter(std::istream& i) + : is(i) + { + // immediately abort if stream is erroneous + if (i.fail()) + { + JSON_THROW(parse_error(111, 0, "bad input stream")); + } + } + + int get_character() override + { + return is.get(); + } + + std::string read(size_t offset, size_t length) override + { + // create buffer + std::string result(length, '\0'); + + // save stream position + auto current_pos = is.tellg(); + // save stream flags + auto flags = is.rdstate(); + + // clear stream flags + is.clear(); + // set stream position + is.seekg(offset); + // read bytes + is.read(&result[0], length); + + // reset stream position + is.seekg(current_pos); + // reset stream flags + is.setstate(flags); + + return result; + } + + private: + std::istream& is; + }; + + /// input adapter for buffer input + class input_buffer_adapter : public input_adapter + { + public: + input_buffer_adapter(const char* b, size_t l) + : input_adapter(), cursor(b), limit(b + l), start(b) + {} + + input_buffer_adapter(const input_buffer_adapter&) = delete; + input_buffer_adapter& operator=(input_buffer_adapter&) = delete; + + int get_character() override + { + if (JSON_LIKELY(cursor < limit)) + { + return *cursor++; + } + else + { + return std::char_traits::eof(); + } + } + + std::string read(size_t offset, size_t length) override + { + return std::string(start + offset, length); + } + + private: + const char* cursor; + const char* limit; + const char* start; + }; + public: /// token types for the parser enum class token_type @@ -10241,135 +10428,6 @@ class basic_json end_of_input ///< indicating the end of the input buffer }; - /// the char type to use in the lexer - using lexer_char_t = unsigned char; - - /// a lexer from a buffer with given length - lexer(const lexer_char_t* buff, const size_t len) noexcept - : m_content(buff) - { - assert(m_content != nullptr); - m_start = m_cursor = m_content; - m_limit = m_content + len; - } - - /*! - @brief a lexer from an input stream - @throw parse_error.111 if input stream is in a bad state - */ - explicit lexer(std::istream& s) - : m_stream(&s), m_line_buffer() - { - // immediately abort if stream is erroneous - if (s.fail()) - { - JSON_THROW(parse_error(111, 0, "bad input stream")); - } - - // fill buffer - fill_line_buffer(); - - // skip UTF-8 byte-order mark - if (m_line_buffer.size() >= 3 and m_line_buffer.substr(0, 3) == "\xEF\xBB\xBF") - { - m_line_buffer[0] = ' '; - m_line_buffer[1] = ' '; - m_line_buffer[2] = ' '; - } - } - - // switch off unwanted functions (due to pointer members) - lexer() = delete; - lexer(const lexer&) = delete; - lexer operator=(const lexer&) = delete; - - /*! - @brief create a string from one or two Unicode code points - - There are two cases: (1) @a codepoint1 is in the Basic Multilingual - Plane (U+0000 through U+FFFF) and @a codepoint2 is 0, or (2) - @a codepoint1 and @a codepoint2 are a UTF-16 surrogate pair to - represent a code point above U+FFFF. - - @param[in] codepoint1 the code point (can be high surrogate) - @param[in] codepoint2 the code point (can be low surrogate or 0) - - @return string representation of the code point; the length of the - result string is between 1 and 4 characters. - - @throw parse_error.102 if the low surrogate is invalid; example: - `""missing or wrong low surrogate""` - @throw parse_error.103 if code point is > 0x10ffff; example: `"code - points above 0x10FFFF are invalid"` - - @complexity Constant. - - @see - */ - string_t to_unicode(const std::size_t codepoint1, - const std::size_t codepoint2 = 0) const - { - // calculate the code point from the given code points - std::size_t codepoint = codepoint1; - - // check if codepoint1 is a high surrogate - if (codepoint1 >= 0xD800 and codepoint1 <= 0xDBFF) - { - // check if codepoint2 is a low surrogate - if (codepoint2 >= 0xDC00 and codepoint2 <= 0xDFFF) - { - codepoint = - // high surrogate occupies the most significant 22 bits - (codepoint1 << 10) - // low surrogate occupies the least significant 15 bits - + codepoint2 - // there is still the 0xD800, 0xDC00 and 0x10000 noise - // in the result so we have to subtract with: - // (0xD800 << 10) + DC00 - 0x10000 = 0x35FDC00 - - 0x35FDC00; - } - else - { - JSON_THROW(parse_error(102, get_position(), "missing or wrong low surrogate")); - } - } - - string_t result; - - if (codepoint < 0x80) - { - // 1-byte characters: 0xxxxxxx (ASCII) - result.append(1, static_cast(codepoint)); - } - else if (codepoint <= 0x7ff) - { - // 2-byte characters: 110xxxxx 10xxxxxx - result.append(1, static_cast(0xC0 | ((codepoint >> 6) & 0x1F))); - result.append(1, static_cast(0x80 | (codepoint & 0x3F))); - } - else if (codepoint <= 0xffff) - { - // 3-byte characters: 1110xxxx 10xxxxxx 10xxxxxx - result.append(1, static_cast(0xE0 | ((codepoint >> 12) & 0x0F))); - result.append(1, static_cast(0x80 | ((codepoint >> 6) & 0x3F))); - result.append(1, static_cast(0x80 | (codepoint & 0x3F))); - } - else if (codepoint <= 0x10ffff) - { - // 4-byte characters: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx - result.append(1, static_cast(0xF0 | ((codepoint >> 18) & 0x07))); - result.append(1, static_cast(0x80 | ((codepoint >> 12) & 0x3F))); - result.append(1, static_cast(0x80 | ((codepoint >> 6) & 0x3F))); - result.append(1, static_cast(0x80 | (codepoint & 0x3F))); - } - else - { - JSON_THROW(parse_error(103, get_position(), "code points above 0x10FFFF are invalid")); - } - - return result; - } - /// return name of values of type token_type (only used for errors) static std::string token_type_name(const token_type t) { @@ -10413,1615 +10471,584 @@ class basic_json } } - /*! - This function implements a scanner for JSON. It is specified using - regular expressions that try to follow RFC 7159 as close as possible. - These regular expressions are then translated into a minimized - deterministic finite automaton (DFA) by the tool - [re2c](http://re2c.org). As a result, the translated code for this - function consists of a large block of code with `goto` jumps. + explicit lexer(std::istream& i) + // : ia(new input_stream_adapter(i)) + : ia(new cached_input_stream_adapter(i)) + {} - @return the class of the next token read from the buffer + lexer(const char* buff, const size_t len) + : ia(new input_buffer_adapter(buff, len)) + {} - @complexity Linear in the length of the input.\n - - Proposition: The loop below will always terminate for finite input.\n - - Proof (by contradiction): Assume a finite input. To loop forever, the - loop must never hit code with a `break` statement. The only code - snippets without a `break` statement is the continue statement for - whitespace. To loop forever, the input must be an infinite sequence - whitespace. This contradicts the assumption of finite input, q.e.d. - */ - token_type scan() + ~lexer() { - while (true) - { - // pointer for backtracking information - m_marker = nullptr; - - // remember the begin of the token - m_start = m_cursor; - assert(m_start != nullptr); - - - { - lexer_char_t yych; - unsigned int yyaccept = 0; - static const unsigned char yybm[] = - { - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 32, 32, 0, 0, 32, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 160, 128, 0, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 192, 192, 192, 192, 192, 192, 192, 192, - 192, 192, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 0, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - }; - if ((m_limit - m_cursor) < 5) - { - fill_line_buffer(5); // LCOV_EXCL_LINE - } - yych = *m_cursor; - if (yybm[0 + yych] & 32) - { - goto basic_json_parser_6; - } - if (yych <= '[') - { - if (yych <= '-') - { - if (yych <= '"') - { - if (yych <= 0x00) - { - goto basic_json_parser_2; - } - if (yych <= '!') - { - goto basic_json_parser_4; - } - goto basic_json_parser_9; - } - else - { - if (yych <= '+') - { - goto basic_json_parser_4; - } - if (yych <= ',') - { - goto basic_json_parser_10; - } - goto basic_json_parser_12; - } - } - else - { - if (yych <= '9') - { - if (yych <= '/') - { - goto basic_json_parser_4; - } - if (yych <= '0') - { - goto basic_json_parser_13; - } - goto basic_json_parser_15; - } - else - { - if (yych <= ':') - { - goto basic_json_parser_17; - } - if (yych <= 'Z') - { - goto basic_json_parser_4; - } - goto basic_json_parser_19; - } - } - } - else - { - if (yych <= 'n') - { - if (yych <= 'e') - { - if (yych == ']') - { - goto basic_json_parser_21; - } - goto basic_json_parser_4; - } - else - { - if (yych <= 'f') - { - goto basic_json_parser_23; - } - if (yych <= 'm') - { - goto basic_json_parser_4; - } - goto basic_json_parser_24; - } - } - else - { - if (yych <= 'z') - { - if (yych == 't') - { - goto basic_json_parser_25; - } - goto basic_json_parser_4; - } - else - { - if (yych <= '{') - { - goto basic_json_parser_26; - } - if (yych == '}') - { - goto basic_json_parser_28; - } - goto basic_json_parser_4; - } - } - } -basic_json_parser_2: - ++m_cursor; - { - last_token_type = token_type::end_of_input; - break; - } -basic_json_parser_4: - ++m_cursor; -basic_json_parser_5: - { - last_token_type = token_type::parse_error; - break; - } -basic_json_parser_6: - ++m_cursor; - if (m_limit <= m_cursor) - { - fill_line_buffer(1); // LCOV_EXCL_LINE - } - yych = *m_cursor; - if (yybm[0 + yych] & 32) - { - goto basic_json_parser_6; - } - { - position += static_cast((m_cursor - m_start)); - continue; - } -basic_json_parser_9: - yyaccept = 0; - yych = *(m_marker = ++m_cursor); - if (yych <= 0x1F) - { - goto basic_json_parser_5; - } - if (yych <= 0x7F) - { - goto basic_json_parser_31; - } - if (yych <= 0xC1) - { - goto basic_json_parser_5; - } - if (yych <= 0xF4) - { - goto basic_json_parser_31; - } - goto basic_json_parser_5; -basic_json_parser_10: - ++m_cursor; - { - last_token_type = token_type::value_separator; - break; - } -basic_json_parser_12: - yych = *++m_cursor; - if (yych <= '/') - { - goto basic_json_parser_5; - } - if (yych <= '0') - { - goto basic_json_parser_43; - } - if (yych <= '9') - { - goto basic_json_parser_45; - } - goto basic_json_parser_5; -basic_json_parser_13: - yyaccept = 1; - yych = *(m_marker = ++m_cursor); - if (yych <= '9') - { - if (yych == '.') - { - goto basic_json_parser_47; - } - if (yych >= '0') - { - goto basic_json_parser_48; - } - } - else - { - if (yych <= 'E') - { - if (yych >= 'E') - { - goto basic_json_parser_51; - } - } - else - { - if (yych == 'e') - { - goto basic_json_parser_51; - } - } - } -basic_json_parser_14: - { - last_token_type = token_type::value_unsigned; - break; - } -basic_json_parser_15: - yyaccept = 1; - m_marker = ++m_cursor; - if ((m_limit - m_cursor) < 3) - { - fill_line_buffer(3); // LCOV_EXCL_LINE - } - yych = *m_cursor; - if (yybm[0 + yych] & 64) - { - goto basic_json_parser_15; - } - if (yych <= 'D') - { - if (yych == '.') - { - goto basic_json_parser_47; - } - goto basic_json_parser_14; - } - else - { - if (yych <= 'E') - { - goto basic_json_parser_51; - } - if (yych == 'e') - { - goto basic_json_parser_51; - } - goto basic_json_parser_14; - } -basic_json_parser_17: - ++m_cursor; - { - last_token_type = token_type::name_separator; - break; - } -basic_json_parser_19: - ++m_cursor; - { - last_token_type = token_type::begin_array; - break; - } -basic_json_parser_21: - ++m_cursor; - { - last_token_type = token_type::end_array; - break; - } -basic_json_parser_23: - yyaccept = 0; - yych = *(m_marker = ++m_cursor); - if (yych == 'a') - { - goto basic_json_parser_52; - } - goto basic_json_parser_5; -basic_json_parser_24: - yyaccept = 0; - yych = *(m_marker = ++m_cursor); - if (yych == 'u') - { - goto basic_json_parser_53; - } - goto basic_json_parser_5; -basic_json_parser_25: - yyaccept = 0; - yych = *(m_marker = ++m_cursor); - if (yych == 'r') - { - goto basic_json_parser_54; - } - goto basic_json_parser_5; -basic_json_parser_26: - ++m_cursor; - { - last_token_type = token_type::begin_object; - break; - } -basic_json_parser_28: - ++m_cursor; - { - last_token_type = token_type::end_object; - break; - } -basic_json_parser_30: - ++m_cursor; - if (m_limit <= m_cursor) - { - fill_line_buffer(1); // LCOV_EXCL_LINE - } - yych = *m_cursor; -basic_json_parser_31: - if (yybm[0 + yych] & 128) - { - goto basic_json_parser_30; - } - if (yych <= 0xE0) - { - if (yych <= '\\') - { - if (yych <= 0x1F) - { - goto basic_json_parser_32; - } - if (yych <= '"') - { - goto basic_json_parser_33; - } - goto basic_json_parser_35; - } - else - { - if (yych <= 0xC1) - { - goto basic_json_parser_32; - } - if (yych <= 0xDF) - { - goto basic_json_parser_36; - } - goto basic_json_parser_37; - } - } - else - { - if (yych <= 0xEF) - { - if (yych == 0xED) - { - goto basic_json_parser_39; - } - goto basic_json_parser_38; - } - else - { - if (yych <= 0xF0) - { - goto basic_json_parser_40; - } - if (yych <= 0xF3) - { - goto basic_json_parser_41; - } - if (yych <= 0xF4) - { - goto basic_json_parser_42; - } - } - } -basic_json_parser_32: - m_cursor = m_marker; - if (yyaccept <= 1) - { - if (yyaccept == 0) - { - goto basic_json_parser_5; - } - else - { - goto basic_json_parser_14; - } - } - else - { - if (yyaccept == 2) - { - goto basic_json_parser_44; - } - else - { - goto basic_json_parser_58; - } - } -basic_json_parser_33: - ++m_cursor; - { - last_token_type = token_type::value_string; - break; - } -basic_json_parser_35: - ++m_cursor; - if (m_limit <= m_cursor) - { - fill_line_buffer(1); // LCOV_EXCL_LINE - } - yych = *m_cursor; - if (yych <= 'e') - { - if (yych <= '/') - { - if (yych == '"') - { - goto basic_json_parser_30; - } - if (yych <= '.') - { - goto basic_json_parser_32; - } - goto basic_json_parser_30; - } - else - { - if (yych <= '\\') - { - if (yych <= '[') - { - goto basic_json_parser_32; - } - goto basic_json_parser_30; - } - else - { - if (yych == 'b') - { - goto basic_json_parser_30; - } - goto basic_json_parser_32; - } - } - } - else - { - if (yych <= 'q') - { - if (yych <= 'f') - { - goto basic_json_parser_30; - } - if (yych == 'n') - { - goto basic_json_parser_30; - } - goto basic_json_parser_32; - } - else - { - if (yych <= 's') - { - if (yych <= 'r') - { - goto basic_json_parser_30; - } - goto basic_json_parser_32; - } - else - { - if (yych <= 't') - { - goto basic_json_parser_30; - } - if (yych <= 'u') - { - goto basic_json_parser_55; - } - goto basic_json_parser_32; - } - } - } -basic_json_parser_36: - ++m_cursor; - if (m_limit <= m_cursor) - { - fill_line_buffer(1); // LCOV_EXCL_LINE - } - yych = *m_cursor; - if (yych <= 0x7F) - { - goto basic_json_parser_32; - } - if (yych <= 0xBF) - { - goto basic_json_parser_30; - } - goto basic_json_parser_32; -basic_json_parser_37: - ++m_cursor; - if (m_limit <= m_cursor) - { - fill_line_buffer(1); // LCOV_EXCL_LINE - } - yych = *m_cursor; - if (yych <= 0x9F) - { - goto basic_json_parser_32; - } - if (yych <= 0xBF) - { - goto basic_json_parser_36; - } - goto basic_json_parser_32; -basic_json_parser_38: - ++m_cursor; - if (m_limit <= m_cursor) - { - fill_line_buffer(1); // LCOV_EXCL_LINE - } - yych = *m_cursor; - if (yych <= 0x7F) - { - goto basic_json_parser_32; - } - if (yych <= 0xBF) - { - goto basic_json_parser_36; - } - goto basic_json_parser_32; -basic_json_parser_39: - ++m_cursor; - if (m_limit <= m_cursor) - { - fill_line_buffer(1); // LCOV_EXCL_LINE - } - yych = *m_cursor; - if (yych <= 0x7F) - { - goto basic_json_parser_32; - } - if (yych <= 0x9F) - { - goto basic_json_parser_36; - } - goto basic_json_parser_32; -basic_json_parser_40: - ++m_cursor; - if (m_limit <= m_cursor) - { - fill_line_buffer(1); // LCOV_EXCL_LINE - } - yych = *m_cursor; - if (yych <= 0x8F) - { - goto basic_json_parser_32; - } - if (yych <= 0xBF) - { - goto basic_json_parser_38; - } - goto basic_json_parser_32; -basic_json_parser_41: - ++m_cursor; - if (m_limit <= m_cursor) - { - fill_line_buffer(1); // LCOV_EXCL_LINE - } - yych = *m_cursor; - if (yych <= 0x7F) - { - goto basic_json_parser_32; - } - if (yych <= 0xBF) - { - goto basic_json_parser_38; - } - goto basic_json_parser_32; -basic_json_parser_42: - ++m_cursor; - if (m_limit <= m_cursor) - { - fill_line_buffer(1); // LCOV_EXCL_LINE - } - yych = *m_cursor; - if (yych <= 0x7F) - { - goto basic_json_parser_32; - } - if (yych <= 0x8F) - { - goto basic_json_parser_38; - } - goto basic_json_parser_32; -basic_json_parser_43: - yyaccept = 2; - yych = *(m_marker = ++m_cursor); - if (yych <= '9') - { - if (yych == '.') - { - goto basic_json_parser_47; - } - if (yych >= '0') - { - goto basic_json_parser_48; - } - } - else - { - if (yych <= 'E') - { - if (yych >= 'E') - { - goto basic_json_parser_51; - } - } - else - { - if (yych == 'e') - { - goto basic_json_parser_51; - } - } - } -basic_json_parser_44: - { - last_token_type = token_type::value_integer; - break; - } -basic_json_parser_45: - yyaccept = 2; - m_marker = ++m_cursor; - if ((m_limit - m_cursor) < 3) - { - fill_line_buffer(3); // LCOV_EXCL_LINE - } - yych = *m_cursor; - if (yych <= '9') - { - if (yych == '.') - { - goto basic_json_parser_47; - } - if (yych <= '/') - { - goto basic_json_parser_44; - } - goto basic_json_parser_45; - } - else - { - if (yych <= 'E') - { - if (yych <= 'D') - { - goto basic_json_parser_44; - } - goto basic_json_parser_51; - } - else - { - if (yych == 'e') - { - goto basic_json_parser_51; - } - goto basic_json_parser_44; - } - } -basic_json_parser_47: - yych = *++m_cursor; - if (yych <= '/') - { - goto basic_json_parser_32; - } - if (yych <= '9') - { - goto basic_json_parser_56; - } - goto basic_json_parser_32; -basic_json_parser_48: - ++m_cursor; - if (m_limit <= m_cursor) - { - fill_line_buffer(1); // LCOV_EXCL_LINE - } - yych = *m_cursor; - if (yych <= '/') - { - goto basic_json_parser_50; - } - if (yych <= '9') - { - goto basic_json_parser_48; - } -basic_json_parser_50: - { - last_token_type = token_type::parse_error; - break; - } -basic_json_parser_51: - yych = *++m_cursor; - if (yych <= ',') - { - if (yych == '+') - { - goto basic_json_parser_59; - } - goto basic_json_parser_32; - } - else - { - if (yych <= '-') - { - goto basic_json_parser_59; - } - if (yych <= '/') - { - goto basic_json_parser_32; - } - if (yych <= '9') - { - goto basic_json_parser_60; - } - goto basic_json_parser_32; - } -basic_json_parser_52: - yych = *++m_cursor; - if (yych == 'l') - { - goto basic_json_parser_62; - } - goto basic_json_parser_32; -basic_json_parser_53: - yych = *++m_cursor; - if (yych == 'l') - { - goto basic_json_parser_63; - } - goto basic_json_parser_32; -basic_json_parser_54: - yych = *++m_cursor; - if (yych == 'u') - { - goto basic_json_parser_64; - } - goto basic_json_parser_32; -basic_json_parser_55: - ++m_cursor; - if (m_limit <= m_cursor) - { - fill_line_buffer(1); // LCOV_EXCL_LINE - } - yych = *m_cursor; - if (yych <= '@') - { - if (yych <= '/') - { - goto basic_json_parser_32; - } - if (yych <= '9') - { - goto basic_json_parser_65; - } - goto basic_json_parser_32; - } - else - { - if (yych <= 'F') - { - goto basic_json_parser_65; - } - if (yych <= '`') - { - goto basic_json_parser_32; - } - if (yych <= 'f') - { - goto basic_json_parser_65; - } - goto basic_json_parser_32; - } -basic_json_parser_56: - yyaccept = 3; - m_marker = ++m_cursor; - if ((m_limit - m_cursor) < 3) - { - fill_line_buffer(3); // LCOV_EXCL_LINE - } - yych = *m_cursor; - if (yych <= 'D') - { - if (yych <= '/') - { - goto basic_json_parser_58; - } - if (yych <= '9') - { - goto basic_json_parser_56; - } - } - else - { - if (yych <= 'E') - { - goto basic_json_parser_51; - } - if (yych == 'e') - { - goto basic_json_parser_51; - } - } -basic_json_parser_58: - { - last_token_type = token_type::value_float; - break; - } -basic_json_parser_59: - yych = *++m_cursor; - if (yych <= '/') - { - goto basic_json_parser_32; - } - if (yych >= ':') - { - goto basic_json_parser_32; - } -basic_json_parser_60: - ++m_cursor; - if (m_limit <= m_cursor) - { - fill_line_buffer(1); // LCOV_EXCL_LINE - } - yych = *m_cursor; - if (yych <= '/') - { - goto basic_json_parser_58; - } - if (yych <= '9') - { - goto basic_json_parser_60; - } - goto basic_json_parser_58; -basic_json_parser_62: - yych = *++m_cursor; - if (yych == 's') - { - goto basic_json_parser_66; - } - goto basic_json_parser_32; -basic_json_parser_63: - yych = *++m_cursor; - if (yych == 'l') - { - goto basic_json_parser_67; - } - goto basic_json_parser_32; -basic_json_parser_64: - yych = *++m_cursor; - if (yych == 'e') - { - goto basic_json_parser_69; - } - goto basic_json_parser_32; -basic_json_parser_65: - ++m_cursor; - if (m_limit <= m_cursor) - { - fill_line_buffer(1); // LCOV_EXCL_LINE - } - yych = *m_cursor; - if (yych <= '@') - { - if (yych <= '/') - { - goto basic_json_parser_32; - } - if (yych <= '9') - { - goto basic_json_parser_71; - } - goto basic_json_parser_32; - } - else - { - if (yych <= 'F') - { - goto basic_json_parser_71; - } - if (yych <= '`') - { - goto basic_json_parser_32; - } - if (yych <= 'f') - { - goto basic_json_parser_71; - } - goto basic_json_parser_32; - } -basic_json_parser_66: - yych = *++m_cursor; - if (yych == 'e') - { - goto basic_json_parser_72; - } - goto basic_json_parser_32; -basic_json_parser_67: - ++m_cursor; - { - last_token_type = token_type::literal_null; - break; - } -basic_json_parser_69: - ++m_cursor; - { - last_token_type = token_type::literal_true; - break; - } -basic_json_parser_71: - ++m_cursor; - if (m_limit <= m_cursor) - { - fill_line_buffer(1); // LCOV_EXCL_LINE - } - yych = *m_cursor; - if (yych <= '@') - { - if (yych <= '/') - { - goto basic_json_parser_32; - } - if (yych <= '9') - { - goto basic_json_parser_74; - } - goto basic_json_parser_32; - } - else - { - if (yych <= 'F') - { - goto basic_json_parser_74; - } - if (yych <= '`') - { - goto basic_json_parser_32; - } - if (yych <= 'f') - { - goto basic_json_parser_74; - } - goto basic_json_parser_32; - } -basic_json_parser_72: - ++m_cursor; - { - last_token_type = token_type::literal_false; - break; - } -basic_json_parser_74: - ++m_cursor; - if (m_limit <= m_cursor) - { - fill_line_buffer(1); // LCOV_EXCL_LINE - } - yych = *m_cursor; - if (yych <= '@') - { - if (yych <= '/') - { - goto basic_json_parser_32; - } - if (yych <= '9') - { - goto basic_json_parser_30; - } - goto basic_json_parser_32; - } - else - { - if (yych <= 'F') - { - goto basic_json_parser_30; - } - if (yych <= '`') - { - goto basic_json_parser_32; - } - if (yych <= 'f') - { - goto basic_json_parser_30; - } - goto basic_json_parser_32; - } - } - - } - - position += static_cast((m_cursor - m_start)); - return last_token_type; + delete ia; } - /*! - @brief append data from the stream to the line buffer + // switch off unwanted functions (due to pointer members) + lexer() = delete; + lexer(const lexer&) = delete; + lexer operator=(const lexer&) = delete; - This function is called by the scan() function when the end of the - buffer (`m_limit`) is reached and the `m_cursor` pointer cannot be - incremented without leaving the limits of the line buffer. Note re2c - decides when to call this function. + private: + ///////////////////// + // scan functions + ///////////////////// - If the lexer reads from contiguous storage, there is no trailing null - byte. Therefore, this function must make sure to add these padding - null bytes. - - If the lexer reads from an input stream, this function reads the next - line of the input. - - @pre - p p p p p p u u u u u x . . . . . . - ^ ^ ^ ^ - m_content m_start | m_limit - m_cursor - - @post - u u u u u x x x x x x x . . . . . . - ^ ^ ^ - | m_cursor m_limit - m_start - m_content - */ - void fill_line_buffer(size_t n = 0) + // must be called after \u was read; returns following xxxx as hex or -1 when error + int get_codepoint() { - // if line buffer is used, m_content points to its data - assert(m_line_buffer.empty() - or m_content == reinterpret_cast(m_line_buffer.data())); - - // if line buffer is used, m_limit is set past the end of its data - assert(m_line_buffer.empty() - or m_limit == m_content + m_line_buffer.size()); - - // pointer relationships - assert(m_content <= m_start); - assert(m_start <= m_cursor); - assert(m_cursor <= m_limit); - assert(m_marker == nullptr or m_marker <= m_limit); - - // number of processed characters (p) - const auto num_processed_chars = static_cast(m_start - m_content); - // offset for m_marker wrt. to m_start - const auto offset_marker = (m_marker == nullptr) ? 0 : m_marker - m_start; - // number of unprocessed characters (u) - const auto offset_cursor = m_cursor - m_start; - - // no stream is used or end of file is reached - if (m_stream == nullptr or m_stream->eof()) + // read xxxx of \uxxxx + std::vector buffer(5, '\0'); + for (size_t i = 0; i < 4; ++i) { - // m_start may or may not be pointing into m_line_buffer at - // this point. We trust the standard library to do the right - // thing. See http://stackoverflow.com/q/28142011/266378 - m_line_buffer.assign(m_start, m_limit); - - // append n characters to make sure that there is sufficient - // space between m_cursor and m_limit - m_line_buffer.append(1, '\x00'); - if (n > 0) + get(); + if (JSON_UNLIKELY(current != std::char_traits::eof())) { - m_line_buffer.append(n - 1, '\x01'); - } - } - else - { - // delete processed characters from line buffer - m_line_buffer.erase(0, num_processed_chars); - // read next line from input stream - m_line_buffer_tmp.clear(); - - // check if stream is still good - if (m_stream->fail()) - { - JSON_THROW(parse_error(111, 0, "bad input stream")); - } - - std::getline(*m_stream, m_line_buffer_tmp, '\n'); - - // add line with newline symbol to the line buffer - m_line_buffer += m_line_buffer_tmp; - m_line_buffer.push_back('\n'); - } - - // set pointers - m_content = reinterpret_cast(m_line_buffer.data()); - assert(m_content != nullptr); - m_start = m_content; - m_marker = m_start + offset_marker; - m_cursor = m_start + offset_cursor; - m_limit = m_start + m_line_buffer.size(); - } - - /// return string representation of last read token - string_t get_token_string() const - { - assert(m_start != nullptr); - return string_t(reinterpret_cast(m_start), - static_cast(m_cursor - m_start)); - } - - /*! - @brief return string value for string tokens - - The function iterates the characters between the opening and closing - quotes of the string value. The complete string is the range - [m_start,m_cursor). Consequently, we iterate from m_start+1 to - m_cursor-1. - - We differentiate two cases: - - 1. Escaped characters. In this case, a new character is constructed - according to the nature of the escape. Some escapes create new - characters (e.g., `"\\n"` is replaced by `"\n"`), some are copied - as is (e.g., `"\\\\"`). Furthermore, Unicode escapes of the shape - `"\\uxxxx"` need special care. In this case, to_unicode takes care - of the construction of the values. - 2. Unescaped characters are copied as is. - - @pre `m_cursor - m_start >= 2`, meaning the length of the last token - is at least 2 bytes which is trivially true for any string (which - consists of at least two quotes). - - " c1 c2 c3 ... " - ^ ^ - m_start m_cursor - - @complexity Linear in the length of the string.\n - - Lemma: The loop body will always terminate.\n - - Proof (by contradiction): Assume the loop body does not terminate. As - the loop body does not contain another loop, one of the called - functions must never return. The called functions are `std::strtoul` - and to_unicode. Neither function can loop forever, so the loop body - will never loop forever which contradicts the assumption that the loop - body does not terminate, q.e.d.\n - - Lemma: The loop condition for the for loop is eventually false.\n - - Proof (by contradiction): Assume the loop does not terminate. Due to - the above lemma, this can only be due to a tautological loop - condition; that is, the loop condition i < m_cursor - 1 must always be - true. Let x be the change of i for any loop iteration. Then - m_start + 1 + x < m_cursor - 1 must hold to loop indefinitely. This - can be rephrased to m_cursor - m_start - 2 > x. With the - precondition, we x <= 0, meaning that the loop condition holds - indefinitely if i is always decreased. However, observe that the value - of i is strictly increasing with each iteration, as it is incremented - by 1 in the iteration expression and never decremented inside the loop - body. Hence, the loop condition will eventually be false which - contradicts the assumption that the loop condition is a tautology, - q.e.d. - - @return string value of current token without opening and closing - quotes - @throw parse_error.102 if to_unicode fails or surrogate error - @throw parse_error.103 if to_unicode fails - */ - string_t get_string() const - { - assert(m_cursor - m_start >= 2); - - string_t result; - result.reserve(static_cast(m_cursor - m_start - 2)); - - // iterate the result between the quotes - for (const lexer_char_t* i = m_start + 1; i < m_cursor - 1; ++i) - { - // find next escape character - auto e = std::find(i, m_cursor - 1, '\\'); - if (e != i) - { - // see https://github.com/nlohmann/json/issues/365#issuecomment-262874705 - for (auto k = i; k < e; k++) - { - result.push_back(static_cast(*k)); - } - i = e - 1; // -1 because of ++i + buffer[i] = static_cast(current); } else { - // processing escaped character - // read next character - ++i; - - switch (*i) - { - // the default escapes - case 't': - { - result += "\t"; - break; - } - case 'b': - { - result += "\b"; - break; - } - case 'f': - { - result += "\f"; - break; - } - case 'n': - { - result += "\n"; - break; - } - case 'r': - { - result += "\r"; - break; - } - case '\\': - { - result += "\\"; - break; - } - case '/': - { - result += "/"; - break; - } - case '"': - { - result += "\""; - break; - } - - // unicode - case 'u': - { - // get code xxxx from uxxxx - auto codepoint = std::strtoul(std::string(reinterpret_cast(i + 1), - 4).c_str(), nullptr, 16); - - // check if codepoint is a high surrogate - if (codepoint >= 0xD800 and codepoint <= 0xDBFF) - { - // make sure there is a subsequent unicode - if ((i + 6 >= m_limit) or * (i + 5) != '\\' or * (i + 6) != 'u') - { - JSON_THROW(parse_error(102, get_position(), "missing low surrogate")); - } - - // get code yyyy from uxxxx\uyyyy - auto codepoint2 = std::strtoul(std::string(reinterpret_cast - (i + 7), 4).c_str(), nullptr, 16); - result += to_unicode(codepoint, codepoint2); - // skip the next 10 characters (xxxx\uyyyy) - i += 10; - } - else if (codepoint >= 0xDC00 and codepoint <= 0xDFFF) - { - // we found a lone low surrogate - JSON_THROW(parse_error(102, get_position(), "missing high surrogate")); - } - else - { - // add unicode character(s) - result += to_unicode(codepoint); - // skip the next four characters (xxxx) - i += 4; - } - break; - } - } + // error message will be created by caller + return -1; } } - return result; + char* endptr; + errno = 0; + + const int codepoint = static_cast(std::strtoul(buffer.data(), &endptr, 16)); + + if (JSON_LIKELY(errno == 0 and endptr == buffer.data() + 4)) + { + return codepoint; + } + else + { + // conversion incomplete or failure + return -1; + } } - - /*! - @brief parse string into a built-in arithmetic type as if the current - locale is POSIX. - - @note in floating-point case strtod may parse past the token's end - - this is not an error - - @note any leading blanks are not handled - */ - struct strtonum + token_type scan_string() { - public: - strtonum(const char* start, const char* end) - : m_start(start), m_end(end) - {} + // reset yytext (ignore opening quote) + reset(); - /*! - @return true iff parsed successfully as number of type T - - @param[in,out] val shall contain parsed value, or undefined value - if could not parse - */ - template::value>::type> - bool to(T& val) const + while (true) { - return parse(val, std::is_integral()); - } + get(); - private: - const char* const m_start = nullptr; - const char* const m_end = nullptr; - - // floating-point conversion - - // overloaded wrappers for strtod/strtof/strtold - // that will be called from parse - static void strtof(float& f, const char* str, char** endptr) - { - f = std::strtof(str, endptr); - } - - static void strtof(double& f, const char* str, char** endptr) - { - f = std::strtod(str, endptr); - } - - static void strtof(long double& f, const char* str, char** endptr) - { - f = std::strtold(str, endptr); - } - - template - bool parse(T& value, /*is_integral=*/std::false_type) const - { - // replace decimal separator with locale-specific version, - // when necessary; data will point to either the original - // string, or buf, or tempstr containing the fixed string. - std::string tempstr; - std::array buf; - const size_t len = static_cast(m_end - m_start); - - // lexer will reject empty numbers - assert(len > 0); - - // since dealing with strtod family of functions, we're - // getting the decimal point char from the C locale facilities - // instead of C++'s numpunct facet of the current std::locale - const auto loc = localeconv(); - assert(loc != nullptr); - const char decimal_point_char = (loc->decimal_point == nullptr) ? '.' : loc->decimal_point[0]; - - const char* data = m_start; - - if (decimal_point_char != '.') + // end of file while parsing string + if (JSON_UNLIKELY(current == std::char_traits::eof())) { - const size_t ds_pos = static_cast(std::find(m_start, m_end, '.') - m_start); + error_message = "invalid string: missing closing quote"; + return token_type::parse_error; + } - if (ds_pos != len) + // control character + if (JSON_UNLIKELY('\x00' <= current and current <= '\x1f')) + { + error_message = "invalid string: control characters (U+0000 through U+001f) must be escaped"; + return token_type::parse_error; + } + + switch (current) + { + // closing quote + case '\"': { - // copy the data into the local buffer or tempstr, if - // buffer is too small; replace decimal separator, and - // update data to point to the modified bytes - if ((len + 1) < buf.size()) + add('\0'); + --yylen; + return token_type::value_string; + } + + // escape sequence + case '\\': + { + switch (get()) { - std::copy(m_start, m_end, buf.begin()); - buf[len] = 0; - buf[ds_pos] = decimal_point_char; - data = buf.data(); - } - else - { - tempstr.assign(m_start, m_end); - tempstr[ds_pos] = decimal_point_char; - data = tempstr.c_str(); + // quotation mark + case '\"': + add('\"'); + break; + // reverse solidus + case '\\': + add('\\'); + break; + // solidus + case '/': + add('/'); + break; + // backspace + case 'b': + add('\b'); + break; + // form feed + case 'f': + add('\f'); + break; + // line feed + case 'n': + add('\n'); + break; + // carriage return + case 'r': + add('\r'); + break; + // tab + case 't': + add('\t'); + break; + + // unicode escapes + case 'u': + { + int codepoint; + int codepoint1 = get_codepoint(); + + if (JSON_UNLIKELY(codepoint1 == -1)) + { + error_message = "invalid string: '\\u' must be followed by 4 hex digits"; + return token_type::parse_error; + } + + // check if code point is a high surrogate + if (codepoint1 >= 0xD800 and codepoint1 <= 0xDBFF) + { + // expect next \uxxxx entry + if (JSON_LIKELY(get() == '\\' and get() == 'u')) + { + int codepoint2 = get_codepoint(); + + if (JSON_UNLIKELY(codepoint2 == -1)) + { + error_message = "invalid string: '\\u' must be followed by 4 hex digits"; + return token_type::parse_error; + } + + // check if codepoint2 is a low surrogate + if (codepoint2 >= 0xDC00 and codepoint2 <= 0xDFFF) + { + codepoint = + // high surrogate occupies the most significant 22 bits + (codepoint1 << 10) + // low surrogate occupies the least significant 15 bits + + codepoint2 + // there is still the 0xD800, 0xDC00 and 0x10000 noise + // in the result so we have to subtract with: + // (0xD800 << 10) + DC00 - 0x10000 = 0x35FDC00 + - 0x35FDC00; + } + else + { + error_message = "invalid string: invalid low surrogate"; + return token_type::parse_error; + } + } + else + { + error_message = "invalid string: missing low surrogate"; + return token_type::parse_error; + } + } + else + { + if (JSON_UNLIKELY(codepoint1 >= 0xDC00 and codepoint1 <= 0xDFFF)) + { + error_message = "invalid string: missing high surrogate"; + return token_type::parse_error; + } + + // only work with first code point + codepoint = codepoint1; + } + + // translate code point to bytes + if (codepoint < 0x80) + { + // 1-byte characters: 0xxxxxxx (ASCII) + add(codepoint); + } + else if (codepoint <= 0x7ff) + { + // 2-byte characters: 110xxxxx 10xxxxxx + add(0xC0 | (codepoint >> 6)); + add(0x80 | (codepoint & 0x3F)); + } + else if (codepoint <= 0xffff) + { + // 3-byte characters: 1110xxxx 10xxxxxx 10xxxxxx + add(0xE0 | (codepoint >> 12)); + add(0x80 | ((codepoint >> 6) & 0x3F)); + add(0x80 | (codepoint & 0x3F)); + } + else if (codepoint <= 0x10ffff) + { + // 4-byte characters: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + add(0xF0 | (codepoint >> 18)); + add(0x80 | ((codepoint >> 12) & 0x3F)); + add(0x80 | ((codepoint >> 6) & 0x3F)); + add(0x80 | (codepoint & 0x3F)); + } + else + { + error_message = "invalid string: code points above U+10FFFF are invalid"; + return token_type::parse_error; + } + + break; + } + + // other characters after escape + default: + error_message = "invalid string: forbidden character after backspace"; + return token_type::parse_error; } + + break; + } + + // any other character + default: + { + add(current); + break; } } + } + } - char* endptr = nullptr; - value = 0; - // this calls appropriate overload depending on T - strtof(value, data, &endptr); + token_type scan_number() + { + static unsigned char lookup[9][256] = + { + {10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 1, 10, 10, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10}, + {10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10}, + {9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 4, 9, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 5, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 5, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9}, + {9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 4, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 5, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 5, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9}, + {10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 5, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 5, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10}, + {10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 8, 10, 8, 10, 10, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10}, + {9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 5, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 5, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9}, + {9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9}, + {10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10} + }; - // parsing was successful iff strtof parsed exactly the number - // of characters determined by the lexer (len) - const bool ok = (endptr == (data + len)); + reset(); - if (ok and (value == static_cast(0.0)) and (*data == '-')) + bool has_sign = false; + bool has_exp = false; + bool has_point = false; + + int state = lookup[0][static_cast(current)]; + int old_state = 0; + + while (state != 9) + { + has_sign = has_sign or (state == 1); + has_point = has_point or (state == 4); + has_exp = has_exp or (state == 5); + + if (JSON_UNLIKELY(state == 10)) { - // some implementations forget to negate the zero - value = -0.0; + // create error message based on previous state + switch (old_state) + { + case 0: + error_message = "invalid number; expected '-' or digit"; + break; + case 1: + error_message = "invalid number; expected digit after '-'"; + break; + case 4: + error_message = "invalid number; expected digit after '.'"; + break; + case 5: + error_message = "invalid number; expected '+', '-', or digit after exponent"; + break; + case 8: + error_message = "invalid number; expected digit after exponent sign"; + break; + default: + assert(false); // no error in the other states + break; + } + return token_type::parse_error; } - return ok; + add(current); + get(); + old_state = state; + state = lookup[state][static_cast(current)]; } - // integral conversion + // unget the character after the number + unget(); - signed long long parse_integral(char** endptr, /*is_signed*/std::true_type) const + // terminate token + add('\0'); + --yylen; + + if (has_exp or has_point) { - return std::strtoll(m_start, endptr, 10); + value_float = std::strtod(yytext.data(), nullptr); + return token_type::value_float; } - unsigned long long parse_integral(char** endptr, /*is_signed*/std::false_type) const - { - return std::strtoull(m_start, endptr, 10); - } - - template - bool parse(T& value, /*is_integral=*/std::true_type) const + if (has_sign) { char* endptr = nullptr; - errno = 0; // these are thread-local - const auto x = parse_integral(&endptr, std::is_signed()); - - // called right overload? - static_assert(std::is_signed() == std::is_signed(), ""); - - value = static_cast(x); - - return (x == static_cast(value)) // x fits into destination T - and (x < 0) == (value < 0) // preserved sign - //and ((x != 0) or is_integral()) // strto[u]ll did nto fail - and (errno == 0) // strto[u]ll did not overflow - and (m_start < m_end) // token was not empty - and (endptr == m_end); // parsed entire token exactly + value_integer = std::strtoll(yytext.data(), &endptr, 10); + return token_type::value_integer; } - }; + else + { + char* endptr = nullptr; + value_unsigned = std::strtoull(yytext.data(), &endptr, 10); + return token_type::value_unsigned; + } + } - /*! - @brief return number value for number tokens + token_type scan_true() + { + if (JSON_LIKELY((get() == 'r' and get() == 'u' and get() == 'e'))) + { + return token_type::literal_true; + } - This function translates the last token into the most appropriate - number type (either integer, unsigned integer or floating point), - which is passed back to the caller via the result parameter. + error_message = "invalid literal; expected 'true'"; + return token_type::parse_error; + } - integral numbers that don't fit into the the range of the respective - type are parsed as number_float_t + token_type scan_false() + { + if (JSON_LIKELY((get() == 'a' and get() == 'l' and get() == 's' and get() == 'e'))) + { + return token_type::literal_false; + } - floating-point values do not satisfy std::isfinite predicate - are converted to value_t::null + error_message = "invalid literal; expected 'false'"; + return token_type::parse_error; + } - throws if the entire string [m_start .. m_cursor) cannot be - interpreted as a number + token_type scan_null() + { + if (JSON_LIKELY((get() == 'u' and get() == 'l' and get() == 'l'))) + { + return token_type::literal_null; + } + + error_message = "invalid literal; expected 'null'"; + return token_type::parse_error; + } + + ///////////////////// + // input management + ///////////////////// + + void reset() + { + yylen = 0; + start_pos = chars_read - 1; + } + + // get a character from the input + int get() + { + ++chars_read; + + if (JSON_UNLIKELY(next_unget)) + { + next_unget = false; + } + else + { + current = ia->get_character(); + } + + return current; + } + + // unget a character to the input + void unget() + { + --chars_read; + next_unget = true; + } + + // add a character to yytext + void add(int c) + { + // resize yytext if necessary + if (JSON_UNLIKELY((yylen + 1 > yytext.capacity()))) + { + yytext.resize(2 * yytext.capacity(), '\0'); + } + yytext[yylen++] = static_cast(c); + } + + public: + constexpr size_t get_position() const + { + return chars_read; + } + + const std::string get_string() + { + return std::string(yytext.data(), yylen); + } + + std::string get_token_string() const + { + std::string s = ia->read(start_pos, chars_read - start_pos); + std::stringstream ss; + + for (auto c : s) + { + if (c == '\0' or c == std::char_traits::eof()) + { + continue; + } + else if ('\x00' <= c and c <= '\x1f') + { + ss << ""; + } + else + { + ss << c; + } + } + + return ss.str(); + } + + const std::string& get_error_message() const + { + return error_message; + } - @param[out] result @ref basic_json object to receive the number. - @param[in] token the type of the number token - */ bool get_number(basic_json& result, const token_type token) const { - assert(m_start != nullptr); - assert(m_start < m_cursor); - assert((token == token_type::value_unsigned) or - (token == token_type::value_integer) or - (token == token_type::value_float)); - - strtonum num_converter(reinterpret_cast(m_start), - reinterpret_cast(m_cursor)); - switch (token) { case lexer::token_type::value_unsigned: { - number_unsigned_t val; - if (num_converter.to(val)) - { - // parsing successful - result.m_type = value_t::number_unsigned; - result.m_value = val; - return true; - } - break; + result.m_type = value_t::number_unsigned; + result.m_value = static_cast(value_unsigned); + return true; } case lexer::token_type::value_integer: { - number_integer_t val; - if (num_converter.to(val)) - { - // parsing successful - result.m_type = value_t::number_integer; - result.m_value = val; - return true; - } - break; + result.m_type = value_t::number_integer; + result.m_value = static_cast(value_integer); + return true; + } + + case lexer::token_type::value_float: + { + result.m_type = value_t::number_float; + result.m_value = static_cast(value_float); + return true; } default: { - break; + return false; } } - - // parse float (either explicitly or because a previous conversion - // failed) - number_float_t val; - if (num_converter.to(val)) - { - // parsing successful - result.m_type = value_t::number_float; - result.m_value = val; - - // throw in case of infinity or NAN - if (not std::isfinite(result.m_value.number_float)) - { - JSON_THROW(out_of_range(406, "number overflow parsing '" + get_token_string() + "'")); - } - - return true; - } - - // couldn't parse number in any format - return false; } - constexpr size_t get_position() const + token_type scan() { - return position; + // read next character and ignore whitespace + do + { + get(); + } + while (current == ' ' or current == '\t' or current == '\n' or current == '\r'); + + switch (current) + { + // structural characters + case '[': + return token_type::begin_array; + case ']': + return token_type::end_array; + case '{': + return token_type::begin_object; + case '}': + return token_type::end_object; + case ':': + return token_type::name_separator; + case ',': + return token_type::value_separator; + + // literals + case 't': + return scan_true(); + case 'f': + return scan_false(); + case 'n': + return scan_null(); + + // string + case '\"': + return scan_string(); + + // number + case '-': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + return scan_number(); + + // end of input + case '\0': + case std::char_traits::eof(): + return token_type::end_of_input; + + // error + default: + error_message = "invalid literal"; + return token_type::parse_error; + } } private: - /// optional input stream - std::istream* m_stream = nullptr; - /// line buffer buffer for m_stream - string_t m_line_buffer {}; - /// used for filling m_line_buffer - string_t m_line_buffer_tmp {}; - /// the buffer pointer - const lexer_char_t* m_content = nullptr; - /// pointer to the beginning of the current symbol - const lexer_char_t* m_start = nullptr; - /// pointer for backtracking information - const lexer_char_t* m_marker = nullptr; - /// pointer to the current symbol - const lexer_char_t* m_cursor = nullptr; - /// pointer to the end of the buffer - const lexer_char_t* m_limit = nullptr; - /// the last token type - token_type last_token_type = token_type::end_of_input; - /// current position in the input (read bytes) - size_t position = 0; + /// input adapter + input_adapter* ia = nullptr; + + /// the current character + int current = std::char_traits::eof(); + + /// whether get() should return the last character again + bool next_unget = false; + + /// the number of characters read + size_t chars_read = 0; + /// the start position of the current token + size_t start_pos = 0; + + /// buffer for variable-length tokens (numbers, strings) + std::vector yytext = std::vector(1024, '\0'); + /// current index in yytext + size_t yylen = 0; + + /// a description of occurred lexer errors + std::string error_message = ""; + + // number values + long long value_integer = 0; + unsigned long long value_unsigned = 0; + double value_float = 0; }; /*! @@ -12035,7 +11062,7 @@ basic_json_parser_74: /// a parser reading from a string literal parser(const char* buff, const parser_callback_t cb = nullptr) : callback(cb), - m_lexer(reinterpret_cast(buff), std::strlen(buff)) + m_lexer(buff, std::strlen(buff)) {} /*! @@ -12053,7 +11080,7 @@ basic_json_parser_74: = 0> parser(IteratorType first, IteratorType last, const parser_callback_t cb = nullptr) : callback(cb), - m_lexer(reinterpret_cast(&(*first)), + m_lexer(reinterpret_cast(&(*first)), static_cast(std::distance(first, last))) {} @@ -12071,7 +11098,8 @@ basic_json_parser_74: basic_json result = parse_internal(true); result.assert_invariant(); - expect(lexer::token_type::end_of_input); + // FIXME: this is new behavior + //expect(lexer::token_type::end_of_input); // return parser result and replace it with null in case the // top-level value was discarded by the callback function @@ -12129,6 +11157,8 @@ basic_json_parser_74: // store key expect(lexer::token_type::value_string); + // FIXME get_string returns const char*; maybe we can + // avoid this copy in the future const auto key = m_lexer.get_string(); bool keep_tag = false; @@ -12235,9 +11265,8 @@ basic_json_parser_74: case lexer::token_type::value_string: { - const auto s = m_lexer.get_string(); + result = basic_json(m_lexer.get_string()); get_token(); - result = basic_json(s); break; } @@ -12294,10 +11323,16 @@ basic_json_parser_74: { if (t != last_token) { - std::string error_msg = "parse error - unexpected "; - error_msg += (last_token == lexer::token_type::parse_error ? ("'" + m_lexer.get_token_string() + - "'") : - lexer::token_type_name(last_token)); + std::string error_msg = "syntax error - "; + if (last_token == lexer::token_type::parse_error) + { + error_msg += m_lexer.get_error_message() + "; last read: '" + m_lexer.get_token_string() + "'"; + } + else + { + error_msg += "unexpected " + lexer::token_type_name(last_token); + } + error_msg += "; expected " + lexer::token_type_name(t); JSON_THROW(parse_error(101, m_lexer.get_position(), error_msg)); } @@ -12310,10 +11345,16 @@ basic_json_parser_74: { if (t == last_token) { - std::string error_msg = "parse error - unexpected "; - error_msg += (last_token == lexer::token_type::parse_error ? ("'" + m_lexer.get_token_string() + - "'") : - lexer::token_type_name(last_token)); + std::string error_msg = "syntax error - "; + if (last_token == lexer::token_type::parse_error) + { + error_msg += m_lexer.get_error_message() + "; last read '" + m_lexer.get_token_string() + "'"; + } + else + { + error_msg += "unexpected " + lexer::token_type_name(last_token); + } + JSON_THROW(parse_error(101, m_lexer.get_position(), error_msg)); } } @@ -13826,5 +12867,7 @@ inline nlohmann::json::json_pointer operator "" _json_pointer(const char* s, std #undef JSON_CATCH #undef JSON_THROW #undef JSON_TRY +#undef JSON_LIKELY +#undef JSON_UNLIKELY #endif diff --git a/src/json.hpp.re2c b/src/json.hpp.re2c deleted file mode 100644 index 6a1e09e3..00000000 --- a/src/json.hpp.re2c +++ /dev/null @@ -1,12863 +0,0 @@ -/* - __ _____ _____ _____ - __| | __| | | | JSON for Modern C++ -| | |__ | | | | | | version 2.1.1 -|_____|_____|_____|_|___| https://github.com/nlohmann/json - -Licensed under the MIT License . -Copyright (c) 2013-2017 Niels Lohmann . - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. -*/ - -#ifndef NLOHMANN_JSON_HPP -#define NLOHMANN_JSON_HPP - -#include // all_of, copy, fill, find, for_each, none_of, remove, reverse, transform -#include // array -#include // assert -#include // and, not, or -#include // lconv, localeconv -#include // isfinite, labs, ldexp, signbit -#include // nullptr_t, ptrdiff_t, size_t -#include // int64_t, uint64_t -#include // abort, strtod, strtof, strtold, strtoul, strtoll, strtoull -#include // strlen -#include // forward_list -#include // function, hash, less -#include // initializer_list -#include // istream, ostream -#include // advance, begin, back_inserter, bidirectional_iterator_tag, distance, end, inserter, iterator, iterator_traits, next, random_access_iterator_tag, reverse_iterator -#include // numeric_limits -#include // locale -#include // map -#include // addressof, allocator, allocator_traits, unique_ptr -#include // accumulate -#include // stringstream -#include // getline, stoi, string, to_string -#include // add_pointer, conditional, decay, enable_if, false_type, integral_constant, is_arithmetic, is_base_of, is_const, is_constructible, is_convertible, is_default_constructible, is_enum, is_floating_point, is_integral, is_nothrow_move_assignable, is_nothrow_move_constructible, is_pointer, is_reference, is_same, is_scalar, is_signed, remove_const, remove_cv, remove_pointer, remove_reference, true_type, underlying_type -#include // declval, forward, make_pair, move, pair, swap -#include // vector - -// exclude unsupported compilers -#if defined(__clang__) - #if (__clang_major__ * 10000 + __clang_minor__ * 100 + __clang_patchlevel__) < 30400 - #error "unsupported Clang version - see https://github.com/nlohmann/json#supported-compilers" - #endif -#elif defined(__GNUC__) - #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) < 40900 - #error "unsupported GCC version - see https://github.com/nlohmann/json#supported-compilers" - #endif -#endif - -// disable float-equal warnings on GCC/clang -#if defined(__clang__) || defined(__GNUC__) || defined(__GNUG__) - #pragma GCC diagnostic push - #pragma GCC diagnostic ignored "-Wfloat-equal" -#endif - -// disable documentation warnings on clang -#if defined(__clang__) - #pragma GCC diagnostic push - #pragma GCC diagnostic ignored "-Wdocumentation" -#endif - -// allow to disable exceptions -#if (defined(__cpp_exceptions) || defined(__EXCEPTIONS) || defined(_CPPUNWIND)) && not defined(JSON_NOEXCEPTION) - #define JSON_THROW(exception) throw exception - #define JSON_TRY try - #define JSON_CATCH(exception) catch(exception) -#else - #define JSON_THROW(exception) std::abort() - #define JSON_TRY if(true) - #define JSON_CATCH(exception) if(false) -#endif - -/*! -@brief namespace for Niels Lohmann -@see https://github.com/nlohmann -@since version 1.0.0 -*/ -namespace nlohmann -{ - -/*! -@brief unnamed namespace with internal helper functions - -This namespace collects some functions that could not be defined inside the -@ref basic_json class. - -@since version 2.1.0 -*/ -namespace detail -{ -//////////////// -// exceptions // -//////////////// - -/*! -@brief general exception of the @ref basic_json class - -Extension of std::exception objects with a member @a id for exception ids. - -@since version 3.0.0 -*/ -class exception : public std::exception -{ - public: - /// create exception with id an explanatory string - exception(int id_, const std::string& ename, const std::string& what_arg_) - : id(id_), - what_arg("[json.exception." + ename + "." + std::to_string(id_) + "] " + what_arg_) - {} - - /// returns the explanatory string - virtual const char* what() const noexcept override - { - return what_arg.c_str(); - } - - /// the id of the exception - const int id; - - private: - /// the explanatory string - const std::string what_arg; -}; - -/*! -@brief exception indicating a parse error - -This excpetion is thrown by the library when a parse error occurs. Parse -errors can occur during the deserialization of JSON text as well as when -using JSON Patch. - -Member @a byte holds the byte index of the last read character in the input -file. - -@note For an input with n bytes, 1 is the index of the first character - and n+1 is the index of the terminating null byte or the end of - file. This also holds true when reading a byte vector (CBOR or - MessagePack). - -Exceptions have ids 1xx. - -name / id | example massage | description ------------------------------- | --------------- | ------------------------- -json.exception.parse_error.101 | parse error at 2: unexpected end of input; expected string literal | This error indicates a syntax error while deserializing a JSON text. The error message describes that an unexpected token (character) was encountered, and the member @a byte indicates the error position. -json.exception.parse_error.102 | parse error at 14: missing or wrong low surrogate | JSON uses the `\uxxxx` format to describe Unicode characters. Code points above above 0xFFFF are split into two `\uxxxx` entries ("surrogate pairs"). This error indicates that the surrogate pair is incomplete or contains an invalid code point. -json.exception.parse_error.103 | parse error: code points above 0x10FFFF are invalid | Unicode supports code points up to 0x10FFFF. Code points above 0x10FFFF are invalid. -json.exception.parse_error.104 | parse error: JSON patch must be an array of objects | [RFC 6902](https://tools.ietf.org/html/rfc6902) requires a JSON Patch document to be a JSON document that represents an array of objects. -json.exception.parse_error.105 | parse error: operation must have string member 'op' | An operation of a JSON Patch document must contain exactly one "op" member, whose value indicates the operation to perform. Its value must be one of "add", "remove", "replace", "move", "copy", or "test"; other values are errors. -json.exception.parse_error.106 | parse error: array index '01' must not begin with '0' | An array index in a JSON Pointer ([RFC 6901](https://tools.ietf.org/html/rfc6901)) may be `0` or any number wihtout a leading `0`. -json.exception.parse_error.107 | parse error: JSON pointer must be empty or begin with '/' - was: 'foo' | A JSON Pointer must be a Unicode string containing a sequence of zero or more reference tokens, each prefixed by a `/` character. -json.exception.parse_error.108 | parse error: escape character '~' must be followed with '0' or '1' | In a JSON Pointer, only `~0` and `~1` are valid escape sequences. -json.exception.parse_error.109 | parse error: array index 'one' is not a number | A JSON Pointer array index must be a number. -json.exception.parse_error.110 | parse error at 1: cannot read 2 bytes from vector | When parsing CBOR or MessagePack, the byte vector ends before the complete value has been read. -json.exception.parse_error.111 | parse error: bad input stream | Parsing CBOR or MessagePack from an input stream where the [`badbit` or `failbit`](http://en.cppreference.com/w/cpp/io/ios_base/iostate) is set. -json.exception.parse_error.112 | parse error at 1: error reading CBOR; last byte: 0xf8 | Not all types of CBOR or MessagePack are supported. This exception occurs if an unsupported byte was read. -json.exception.parse_error.113 | parse error at 2: expected a CBOR string; last byte: 0x98 | While parsing a map key, a value that is not a string has been read. - -@since version 3.0.0 -*/ -class parse_error : public exception -{ - public: - /*! - @brief create a parse error exception - @param[in] id_ the id of the exception - @param[in] byte_ the byte index where the error occured (or 0 if - the position cannot be determined) - @param[in] what_arg_ the explanatory string - */ - parse_error(int id_, size_t byte_, const std::string& what_arg_) - : exception(id_, "parse_error", "parse error" + - (byte_ != 0 ? (" at " + std::to_string(byte_)) : "") + - ": " + what_arg_), - byte(byte_) - {} - - /*! - @brief byte index of the parse error - - The byte index of the last read character in the input file. - - @note For an input with n bytes, 1 is the index of the first character - and n+1 is the index of the terminating null byte or the end of - file. This also holds true when reading a byte vector (CBOR or - MessagePack). - */ - const size_t byte; -}; - -/*! -@brief exception indicating errors with iterators - -Exceptions have ids 2xx. - -name / id | example massage | description ------------------------------------ | --------------- | ------------------------- -json.exception.invalid_iterator.201 | iterators are not compatible | The iterators passed to constructor @ref basic_json(InputIT first, InputIT last) are not compatible, meaning they do not belong to the same container. Therefore, the range (@a first, @a last) is invalid. -json.exception.invalid_iterator.202 | iterator does not fit current value | In an erase or insert function, the passed iterator @a pos does not belong to the JSON value for which the function was called. It hence does not define a valid position for the deletion/insertion. -json.exception.invalid_iterator.203 | iterators do not fit current value | Either iterator passed to function @ref erase(IteratorType first, IteratorType last) does not belong to the JSON value from which values shall be erased. It hence does not define a valid range to delete values from. -json.exception.invalid_iterator.204 | iterators out of range | When an iterator range for a primitive type (number, boolean, or string) is passed to a constructor or an erase function, this range has to be exactly (@ref begin(), @ref end()), because this is the only way the single stored value is expressed. All other ranges are invalid. -json.exception.invalid_iterator.205 | iterator out of range | When an iterator for a primitive type (number, boolean, or string) is passed to an erase function, the iterator has to be the @ref begin() iterator, because it is the only way to address the stored value. All other iterators are invalid. -json.exception.invalid_iterator.206 | cannot construct with iterators from null | The iterators passed to constructor @ref basic_json(InputIT first, InputIT last) belong to a JSON null value and hence to not define a valid range. -json.exception.invalid_iterator.207 | cannot use key() for non-object iterators | The key() member function can only be used on iterators belonging to a JSON object, because other types do not have a concept of a key. -json.exception.invalid_iterator.208 | cannot use operator[] for object iterators | The operator[] to specify a concrete offset cannot be used on iterators belonging to a JSON object, because JSON objects are unordered. -json.exception.invalid_iterator.209 | cannot use offsets with object iterators | The offset operators (+, -, +=, -=) cannot be used on iterators belonging to a JSON object, because JSON objects are unordered. -json.exception.invalid_iterator.210 | iterators do not fit | The iterator range passed to the insert function are not compatible, meaning they do not belong to the same container. Therefore, the range (@a first, @a last) is invalid. -json.exception.invalid_iterator.211 | passed iterators may not belong to container | The iterator range passed to the insert function must not be a subrange of the container to insert to. -json.exception.invalid_iterator.212 | cannot compare iterators of different containers | When two iterators are compared, they must belong to the same container. -json.exception.invalid_iterator.213 | cannot compare order of object iterators | The order of object iterators cannot be compated, because JSON objects are unordered. -json.exception.invalid_iterator.214 | cannot get value | Cannot get value for iterator: Either the iterator belongs to a null value or it is an iterator to a primitive type (number, boolean, or string), but the iterator is different to @ref begin(). - -@since version 3.0.0 -*/ -class invalid_iterator : public exception -{ - public: - invalid_iterator(int id_, const std::string& what_arg_) - : exception(id_, "invalid_iterator", what_arg_) - {} -}; - -/*! -@brief exception indicating executing a member function with a wrong type - -Exceptions have ids 3xx. - -name / id | example massage | description ------------------------------ | --------------- | ------------------------- -json.exception.type_error.301 | cannot create object from initializer list | To create an object from an initializer list, the initializer list must consist only of a list of pairs whose first element is a string. When this constraint is violated, an array is created instead. -json.exception.type_error.302 | type must be object, but is array | During implicit or explicit value conversion, the JSON type must be compatible to the target type. For instance, a JSON string can only be converted into string types, but not into numbers or boolean types. -json.exception.type_error.303 | incompatible ReferenceType for get_ref, actual type is object | To retrieve a reference to a value stored in a @ref basic_json object with @ref get_ref, the type of the reference must match the value type. For instance, for a JSON array, the @a ReferenceType must be @ref array_t&. -json.exception.type_error.304 | cannot use at() with string | The @ref at() member functions can only be executed for certain JSON types. -json.exception.type_error.305 | cannot use operator[] with string | The @ref operator[] member functions can only be executed for certain JSON types. -json.exception.type_error.306 | cannot use value() with string | The @ref value() member functions can only be executed for certain JSON types. -json.exception.type_error.307 | cannot use erase() with string | The @ref erase() member functions can only be executed for certain JSON types. -json.exception.type_error.308 | cannot use push_back() with string | The @ref push_back() and @ref operator+= member functions can only be executed for certain JSON types. -json.exception.type_error.309 | cannot use insert() with | The @ref insert() member functions can only be executed for certain JSON types. -json.exception.type_error.310 | cannot use swap() with number | The @ref swap() member functions can only be executed for certain JSON types. -json.exception.type_error.311 | cannot use emplace_back() with string | The @ref emplace_back() member function can only be executed for certain JSON types. -json.exception.type_error.313 | invalid value to unflatten | The @ref unflatten function converts an object whose keys are JSON Pointers back into an arbitrary nested JSON value. The JSON Pointers must not overlap, because then the resulting value would not be well defined. -json.exception.type_error.314 | only objects can be unflattened | The @ref unflatten function only works for an object whose keys are JSON Pointers. -json.exception.type_error.315 | values in object must be primitive | The @ref unflatten function only works for an object whose keys are JSON Pointers and whose values are primitive. - -@since version 3.0.0 -*/ -class type_error : public exception -{ - public: - type_error(int id_, const std::string& what_arg_) - : exception(id_, "type_error", what_arg_) - {} -}; - -/*! -@brief exception indicating access out of the defined range - -Exceptions have ids 4xx. - -name / id | example massage | description -------------------------------- | --------------- | ------------------------- -json.exception.out_of_range.401 | array index 3 is out of range | The provided array index @a i is larger than @a size-1. -json.exception.out_of_range.402 | array index '-' (3) is out of range | The special array index `-` in a JSON Pointer never describes a valid element of the array, but the index past the end. That is, it can only be used to add elements at this position, but not to read it. -json.exception.out_of_range.403 | key 'foo' not found | The provided key was not found in the JSON object. -json.exception.out_of_range.404 | unresolved reference token 'foo' | A reference token in a JSON Pointer could not be resolved. -json.exception.out_of_range.405 | JSON pointer has no parent | The JSON Patch operations 'remove' and 'add' can not be applied to the root element of the JSON value. -json.exception.out_of_range.406 | number overflow parsing '10E1000' | A parsed number could not be stored as without changing it to NaN or INF. - -@since version 3.0.0 -*/ -class out_of_range : public exception -{ - public: - out_of_range(int id_, const std::string& what_arg_) - : exception(id_, "out_of_range", what_arg_) - {} -}; - -/*! -@brief exception indicating other errors - -Exceptions have ids 5xx. - -name / id | example massage | description ------------------------------- | --------------- | ------------------------- -json.exception.other_error.501 | unsuccessful: {"op":"test","path":"/baz", "value":"bar"} | A JSON Patch operation 'test' failed. The unsuccessful operation is also printed. - -@since version 3.0.0 -*/ -class other_error : public exception -{ - public: - other_error(int id_, const std::string& what_arg_) - : exception(id_, "other_error", what_arg_) - {} -}; - - - -/////////////////////////// -// JSON type enumeration // -/////////////////////////// - -/*! -@brief the JSON type enumeration - -This enumeration collects the different JSON types. It is internally used to -distinguish the stored values, and the functions @ref basic_json::is_null(), -@ref basic_json::is_object(), @ref basic_json::is_array(), -@ref basic_json::is_string(), @ref basic_json::is_boolean(), -@ref basic_json::is_number() (with @ref basic_json::is_number_integer(), -@ref basic_json::is_number_unsigned(), and @ref basic_json::is_number_float()), -@ref basic_json::is_discarded(), @ref basic_json::is_primitive(), and -@ref basic_json::is_structured() rely on it. - -@note There are three enumeration entries (number_integer, number_unsigned, and -number_float), because the library distinguishes these three types for numbers: -@ref basic_json::number_unsigned_t is used for unsigned integers, -@ref basic_json::number_integer_t is used for signed integers, and -@ref basic_json::number_float_t is used for floating-point numbers or to -approximate integers which do not fit in the limits of their respective type. - -@sa @ref basic_json::basic_json(const value_t value_type) -- create a JSON -value with the default value for a given type - -@since version 1.0.0 -*/ -enum class value_t : uint8_t -{ - null, ///< null value - object, ///< object (unordered set of name/value pairs) - array, ///< array (ordered collection of values) - string, ///< string value - boolean, ///< boolean value - number_integer, ///< number value (signed integer) - number_unsigned, ///< number value (unsigned integer) - number_float, ///< number value (floating-point) - discarded ///< discarded by the the parser callback function -}; - -/*! -@brief comparison operator for JSON types - -Returns an ordering that is similar to Python: -- order: null < boolean < number < object < array < string -- furthermore, each type is not smaller than itself - -@since version 1.0.0 -*/ -inline bool operator<(const value_t lhs, const value_t rhs) noexcept -{ - static constexpr std::array order = {{ - 0, // null - 3, // object - 4, // array - 5, // string - 1, // boolean - 2, // integer - 2, // unsigned - 2, // float - } - }; - - // discarded values are not comparable - if (lhs == value_t::discarded or rhs == value_t::discarded) - { - return false; - } - - return order[static_cast(lhs)] < - order[static_cast(rhs)]; -} - - -///////////// -// helpers // -///////////// - -// alias templates to reduce boilerplate -template -using enable_if_t = typename std::enable_if::type; - -template -using uncvref_t = typename std::remove_cv::type>::type; - -// taken from http://stackoverflow.com/a/26936864/266378 -template -using is_unscoped_enum = - std::integral_constant::value and - std::is_enum::value>; - -/* -Implementation of two C++17 constructs: conjunction, negation. This is needed -to avoid evaluating all the traits in a condition - -For example: not std::is_same::value and has_value_type::value -will not compile when T = void (on MSVC at least). Whereas -conjunction>, has_value_type>::value will -stop evaluating if negation<...>::value == false - -Please note that those constructs must be used with caution, since symbols can -become very long quickly (which can slow down compilation and cause MSVC -internal compiler errors). Only use it when you have to (see example ahead). -*/ -template struct conjunction : std::true_type {}; -template struct conjunction : B1 {}; -template -struct conjunction : std::conditional, B1>::type {}; - -template struct negation : std::integral_constant < bool, !B::value > {}; - -// dispatch utility (taken from ranges-v3) -template struct priority_tag : priority_tag < N - 1 > {}; -template<> struct priority_tag<0> {}; - - -////////////////// -// constructors // -////////////////// - -template struct external_constructor; - -template<> -struct external_constructor -{ - template - static void construct(BasicJsonType& j, typename BasicJsonType::boolean_t b) noexcept - { - j.m_type = value_t::boolean; - j.m_value = b; - j.assert_invariant(); - } -}; - -template<> -struct external_constructor -{ - template - static void construct(BasicJsonType& j, const typename BasicJsonType::string_t& s) - { - j.m_type = value_t::string; - j.m_value = s; - j.assert_invariant(); - } -}; - -template<> -struct external_constructor -{ - template - static void construct(BasicJsonType& j, typename BasicJsonType::number_float_t val) noexcept - { - j.m_type = value_t::number_float; - j.m_value = val; - j.assert_invariant(); - } -}; - -template<> -struct external_constructor -{ - template - static void construct(BasicJsonType& j, typename BasicJsonType::number_unsigned_t val) noexcept - { - j.m_type = value_t::number_unsigned; - j.m_value = val; - j.assert_invariant(); - } -}; - -template<> -struct external_constructor -{ - template - static void construct(BasicJsonType& j, typename BasicJsonType::number_integer_t val) noexcept - { - j.m_type = value_t::number_integer; - j.m_value = val; - j.assert_invariant(); - } -}; - -template<> -struct external_constructor -{ - template - static void construct(BasicJsonType& j, const typename BasicJsonType::array_t& arr) - { - j.m_type = value_t::array; - j.m_value = arr; - j.assert_invariant(); - } - - template::value, - int> = 0> - static void construct(BasicJsonType& j, const CompatibleArrayType& arr) - { - using std::begin; - using std::end; - j.m_type = value_t::array; - j.m_value.array = j.template create(begin(arr), end(arr)); - j.assert_invariant(); - } - - template - static void construct(BasicJsonType& j, const std::vector& arr) - { - j.m_type = value_t::array; - j.m_value = value_t::array; - j.m_value.array->reserve(arr.size()); - for (bool x : arr) - { - j.m_value.array->push_back(x); - } - j.assert_invariant(); - } -}; - -template<> -struct external_constructor -{ - template - static void construct(BasicJsonType& j, const typename BasicJsonType::object_t& obj) - { - j.m_type = value_t::object; - j.m_value = obj; - j.assert_invariant(); - } - - template::value, - int> = 0> - static void construct(BasicJsonType& j, const CompatibleObjectType& obj) - { - using std::begin; - using std::end; - - j.m_type = value_t::object; - j.m_value.object = j.template create(begin(obj), end(obj)); - j.assert_invariant(); - } -}; - - -//////////////////////// -// has_/is_ functions // -//////////////////////// - -/*! -@brief Helper to determine whether there's a key_type for T. - -This helper is used to tell associative containers apart from other containers -such as sequence containers. For instance, `std::map` passes the test as it -contains a `mapped_type`, whereas `std::vector` fails the test. - -@sa http://stackoverflow.com/a/7728728/266378 -@since version 1.0.0, overworked in version 2.0.6 -*/ -#define NLOHMANN_JSON_HAS_HELPER(type) \ - template struct has_##type { \ - private: \ - template \ - static int detect(U &&); \ - static void detect(...); \ - public: \ - static constexpr bool value = \ - std::is_integral()))>::value; \ - } - -NLOHMANN_JSON_HAS_HELPER(mapped_type); -NLOHMANN_JSON_HAS_HELPER(key_type); -NLOHMANN_JSON_HAS_HELPER(value_type); -NLOHMANN_JSON_HAS_HELPER(iterator); - -#undef NLOHMANN_JSON_HAS_HELPER - - -template -struct is_compatible_object_type_impl : std::false_type {}; - -template -struct is_compatible_object_type_impl -{ - static constexpr auto value = - std::is_constructible::value and - std::is_constructible::value; -}; - -template -struct is_compatible_object_type -{ - static auto constexpr value = is_compatible_object_type_impl < - conjunction>, - has_mapped_type, - has_key_type>::value, - typename BasicJsonType::object_t, CompatibleObjectType >::value; -}; - -template -struct is_basic_json_nested_type -{ - static auto constexpr value = std::is_same::value or - std::is_same::value or - std::is_same::value or - std::is_same::value or - std::is_same::value; -}; - -template -struct is_compatible_array_type -{ - static auto constexpr value = - conjunction>, - negation>, - negation>, - negation>, - has_value_type, - has_iterator>::value; -}; - -template -struct is_compatible_integer_type_impl : std::false_type {}; - -template -struct is_compatible_integer_type_impl -{ - // is there an assert somewhere on overflows? - using RealLimits = std::numeric_limits; - using CompatibleLimits = std::numeric_limits; - - static constexpr auto value = - std::is_constructible::value and - CompatibleLimits::is_integer and - RealLimits::is_signed == CompatibleLimits::is_signed; -}; - -template -struct is_compatible_integer_type -{ - static constexpr auto value = - is_compatible_integer_type_impl < - std::is_integral::value and - not std::is_same::value, - RealIntegerType, CompatibleNumberIntegerType > ::value; -}; - - -// trait checking if JSONSerializer::from_json(json const&, udt&) exists -template -struct has_from_json -{ - private: - // also check the return type of from_json - template::from_json( - std::declval(), std::declval()))>::value>> - static int detect(U&&); - static void detect(...); - - public: - static constexpr bool value = std::is_integral>()))>::value; -}; - -// This trait checks if JSONSerializer::from_json(json const&) exists -// this overload is used for non-default-constructible user-defined-types -template -struct has_non_default_from_json -{ - private: - template < - typename U, - typename = enable_if_t::from_json(std::declval()))>::value >> - static int detect(U&&); - static void detect(...); - - public: - static constexpr bool value = std::is_integral>()))>::value; -}; - -// This trait checks if BasicJsonType::json_serializer::to_json exists -template -struct has_to_json -{ - private: - template::to_json( - std::declval(), std::declval()))> - static int detect(U&&); - static void detect(...); - - public: - static constexpr bool value = std::is_integral>()))>::value; -}; - - -///////////// -// to_json // -///////////// - -template::value, int> = 0> -void to_json(BasicJsonType& j, T b) noexcept -{ - external_constructor::construct(j, b); -} - -template::value, int> = 0> -void to_json(BasicJsonType& j, const CompatibleString& s) -{ - external_constructor::construct(j, s); -} - -template::value, int> = 0> -void to_json(BasicJsonType& j, FloatType val) noexcept -{ - external_constructor::construct(j, static_cast(val)); -} - -template < - typename BasicJsonType, typename CompatibleNumberUnsignedType, - enable_if_t::value, int> = 0 > -void to_json(BasicJsonType& j, CompatibleNumberUnsignedType val) noexcept -{ - external_constructor::construct(j, static_cast(val)); -} - -template < - typename BasicJsonType, typename CompatibleNumberIntegerType, - enable_if_t::value, int> = 0 > -void to_json(BasicJsonType& j, CompatibleNumberIntegerType val) noexcept -{ - external_constructor::construct(j, static_cast(val)); -} - -template::value, int> = 0> -void to_json(BasicJsonType& j, UnscopedEnumType e) noexcept -{ - external_constructor::construct(j, e); -} - -template -void to_json(BasicJsonType& j, const std::vector& e) -{ - external_constructor::construct(j, e); -} - -template < - typename BasicJsonType, typename CompatibleArrayType, - enable_if_t < - is_compatible_array_type::value or - std::is_same::value, - int > = 0 > -void to_json(BasicJsonType& j, const CompatibleArrayType& arr) -{ - external_constructor::construct(j, arr); -} - -template < - typename BasicJsonType, typename CompatibleObjectType, - enable_if_t::value, - int> = 0 > -void to_json(BasicJsonType& j, const CompatibleObjectType& arr) -{ - external_constructor::construct(j, arr); -} - -template ::value, - int> = 0> -void to_json(BasicJsonType& j, T (&arr)[N]) -{ - external_constructor::construct(j, arr); -} - -/////////////// -// from_json // -/////////////// - -// overloads for basic_json template parameters -template::value and - not std::is_same::value, - int> = 0> -void get_arithmetic_value(const BasicJsonType& j, ArithmeticType& val) -{ - switch (static_cast(j)) - { - case value_t::number_unsigned: - { - val = static_cast( - *j.template get_ptr()); - break; - } - case value_t::number_integer: - { - val = static_cast( - *j.template get_ptr()); - break; - } - case value_t::number_float: - { - val = static_cast( - *j.template get_ptr()); - break; - } - default: - { - JSON_THROW(type_error(302, "type must be number, but is " + j.type_name())); - } - } -} - -template -void from_json(const BasicJsonType& j, typename BasicJsonType::boolean_t& b) -{ - if (not j.is_boolean()) - { - JSON_THROW(type_error(302, "type must be boolean, but is " + j.type_name())); - } - b = *j.template get_ptr(); -} - -template -void from_json(const BasicJsonType& j, typename BasicJsonType::string_t& s) -{ - if (not j.is_string()) - { - JSON_THROW(type_error(302, "type must be string, but is " + j.type_name())); - } - s = *j.template get_ptr(); -} - -template -void from_json(const BasicJsonType& j, typename BasicJsonType::number_float_t& val) -{ - get_arithmetic_value(j, val); -} - -template -void from_json(const BasicJsonType& j, typename BasicJsonType::number_unsigned_t& val) -{ - get_arithmetic_value(j, val); -} - -template -void from_json(const BasicJsonType& j, typename BasicJsonType::number_integer_t& val) -{ - get_arithmetic_value(j, val); -} - -template::value, int> = 0> -void from_json(const BasicJsonType& j, UnscopedEnumType& e) -{ - typename std::underlying_type::type val; - get_arithmetic_value(j, val); - e = static_cast(val); -} - -template -void from_json(const BasicJsonType& j, typename BasicJsonType::array_t& arr) -{ - if (not j.is_array()) - { - JSON_THROW(type_error(302, "type must be array, but is " + j.type_name())); - } - arr = *j.template get_ptr(); -} - -// forward_list doesn't have an insert method -template::value, int> = 0> -void from_json(const BasicJsonType& j, std::forward_list& l) -{ - if (not j.is_array()) - { - JSON_THROW(type_error(302, "type must be array, but is " + j.type_name())); - } - - for (auto it = j.rbegin(), end = j.rend(); it != end; ++it) - { - l.push_front(it->template get()); - } -} - -template -void from_json_array_impl(const BasicJsonType& j, CompatibleArrayType& arr, priority_tag<0>) -{ - using std::begin; - using std::end; - - std::transform(j.begin(), j.end(), - std::inserter(arr, end(arr)), [](const BasicJsonType & i) - { - // get() returns *this, this won't call a from_json - // method when value_type is BasicJsonType - return i.template get(); - }); -} - -template -auto from_json_array_impl(const BasicJsonType& j, CompatibleArrayType& arr, priority_tag<1>) --> decltype( - arr.reserve(std::declval()), - void()) -{ - using std::begin; - using std::end; - - arr.reserve(j.size()); - std::transform(j.begin(), j.end(), - std::inserter(arr, end(arr)), [](const BasicJsonType & i) - { - // get() returns *this, this won't call a from_json - // method when value_type is BasicJsonType - return i.template get(); - }); -} - -template::value and - std::is_convertible::value and - not std::is_same::value, int> = 0> -void from_json(const BasicJsonType& j, CompatibleArrayType& arr) -{ - if (not j.is_array()) - { - JSON_THROW(type_error(302, "type must be array, but is " + j.type_name())); - } - - from_json_array_impl(j, arr, priority_tag<1> {}); -} - -template::value, int> = 0> -void from_json(const BasicJsonType& j, CompatibleObjectType& obj) -{ - if (not j.is_object()) - { - JSON_THROW(type_error(302, "type must be object, but is " + j.type_name())); - } - - auto inner_object = j.template get_ptr(); - using std::begin; - using std::end; - // we could avoid the assignment, but this might require a for loop, which - // might be less efficient than the container constructor for some - // containers (would it?) - obj = CompatibleObjectType(begin(*inner_object), end(*inner_object)); -} - -// overload for arithmetic types, not chosen for basic_json template arguments -// (BooleanType, etc..); note: Is it really necessary to provide explicit -// overloads for boolean_t etc. in case of a custom BooleanType which is not -// an arithmetic type? -template::value and - not std::is_same::value and - not std::is_same::value and - not std::is_same::value and - not std::is_same::value, - int> = 0> -void from_json(const BasicJsonType& j, ArithmeticType& val) -{ - switch (static_cast(j)) - { - case value_t::number_unsigned: - { - val = static_cast(*j.template get_ptr()); - break; - } - case value_t::number_integer: - { - val = static_cast(*j.template get_ptr()); - break; - } - case value_t::number_float: - { - val = static_cast(*j.template get_ptr()); - break; - } - case value_t::boolean: - { - val = static_cast(*j.template get_ptr()); - break; - } - default: - { - JSON_THROW(type_error(302, "type must be number, but is " + j.type_name())); - } - } -} - -struct to_json_fn -{ - private: - template - auto call(BasicJsonType& j, T&& val, priority_tag<1>) const noexcept(noexcept(to_json(j, std::forward(val)))) - -> decltype(to_json(j, std::forward(val)), void()) - { - return to_json(j, std::forward(val)); - } - - template - void call(BasicJsonType&, T&&, priority_tag<0>) const noexcept - { - static_assert(sizeof(BasicJsonType) == 0, - "could not find to_json() method in T's namespace"); - } - - public: - template - void operator()(BasicJsonType& j, T&& val) const - noexcept(noexcept(std::declval().call(j, std::forward(val), priority_tag<1> {}))) - { - return call(j, std::forward(val), priority_tag<1> {}); - } -}; - -struct from_json_fn -{ - private: - template - auto call(const BasicJsonType& j, T& val, priority_tag<1>) const - noexcept(noexcept(from_json(j, val))) - -> decltype(from_json(j, val), void()) - { - return from_json(j, val); - } - - template - void call(const BasicJsonType&, T&, priority_tag<0>) const noexcept - { - static_assert(sizeof(BasicJsonType) == 0, - "could not find from_json() method in T's namespace"); - } - - public: - template - void operator()(const BasicJsonType& j, T& val) const - noexcept(noexcept(std::declval().call(j, val, priority_tag<1> {}))) - { - return call(j, val, priority_tag<1> {}); - } -}; - -// taken from ranges-v3 -template -struct static_const -{ - static constexpr T value{}; -}; - -template -constexpr T static_const::value; -} // namespace detail - - -/// namespace to hold default `to_json` / `from_json` functions -namespace -{ -constexpr const auto& to_json = detail::static_const::value; -constexpr const auto& from_json = detail::static_const::value; -} - - -/*! -@brief default JSONSerializer template argument - -This serializer ignores the template arguments and uses ADL -([argument-dependent lookup](http://en.cppreference.com/w/cpp/language/adl)) -for serialization. -*/ -template -struct adl_serializer -{ - /*! - @brief convert a JSON value to any value type - - This function is usually called by the `get()` function of the - @ref basic_json class (either explicit or via conversion operators). - - @param[in] j JSON value to read from - @param[in,out] val value to write to - */ - template - static void from_json(BasicJsonType&& j, ValueType& val) noexcept( - noexcept(::nlohmann::from_json(std::forward(j), val))) - { - ::nlohmann::from_json(std::forward(j), val); - } - - /*! - @brief convert any value type to a JSON value - - This function is usually called by the constructors of the @ref basic_json - class. - - @param[in,out] j JSON value to write to - @param[in] val value to read from - */ - template - static void to_json(BasicJsonType& j, ValueType&& val) noexcept( - noexcept(::nlohmann::to_json(j, std::forward(val)))) - { - ::nlohmann::to_json(j, std::forward(val)); - } -}; - - -/*! -@brief a class to store JSON values - -@tparam ObjectType type for JSON objects (`std::map` by default; will be used -in @ref object_t) -@tparam ArrayType type for JSON arrays (`std::vector` by default; will be used -in @ref array_t) -@tparam StringType type for JSON strings and object keys (`std::string` by -default; will be used in @ref string_t) -@tparam BooleanType type for JSON booleans (`bool` by default; will be used -in @ref boolean_t) -@tparam NumberIntegerType type for JSON integer numbers (`int64_t` by -default; will be used in @ref number_integer_t) -@tparam NumberUnsignedType type for JSON unsigned integer numbers (@c -`uint64_t` by default; will be used in @ref number_unsigned_t) -@tparam NumberFloatType type for JSON floating-point numbers (`double` by -default; will be used in @ref number_float_t) -@tparam AllocatorType type of the allocator to use (`std::allocator` by -default) -@tparam JSONSerializer the serializer to resolve internal calls to `to_json()` -and `from_json()` (@ref adl_serializer by default) - -@requirement The class satisfies the following concept requirements: -- Basic - - [DefaultConstructible](http://en.cppreference.com/w/cpp/concept/DefaultConstructible): - JSON values can be default constructed. The result will be a JSON null - value. - - [MoveConstructible](http://en.cppreference.com/w/cpp/concept/MoveConstructible): - A JSON value can be constructed from an rvalue argument. - - [CopyConstructible](http://en.cppreference.com/w/cpp/concept/CopyConstructible): - A JSON value can be copy-constructed from an lvalue expression. - - [MoveAssignable](http://en.cppreference.com/w/cpp/concept/MoveAssignable): - A JSON value van be assigned from an rvalue argument. - - [CopyAssignable](http://en.cppreference.com/w/cpp/concept/CopyAssignable): - A JSON value can be copy-assigned from an lvalue expression. - - [Destructible](http://en.cppreference.com/w/cpp/concept/Destructible): - JSON values can be destructed. -- Layout - - [StandardLayoutType](http://en.cppreference.com/w/cpp/concept/StandardLayoutType): - JSON values have - [standard layout](http://en.cppreference.com/w/cpp/language/data_members#Standard_layout): - All non-static data members are private and standard layout types, the - class has no virtual functions or (virtual) base classes. -- Library-wide - - [EqualityComparable](http://en.cppreference.com/w/cpp/concept/EqualityComparable): - JSON values can be compared with `==`, see @ref - operator==(const_reference,const_reference). - - [LessThanComparable](http://en.cppreference.com/w/cpp/concept/LessThanComparable): - JSON values can be compared with `<`, see @ref - operator<(const_reference,const_reference). - - [Swappable](http://en.cppreference.com/w/cpp/concept/Swappable): - Any JSON lvalue or rvalue of can be swapped with any lvalue or rvalue of - other compatible types, using unqualified function call @ref swap(). - - [NullablePointer](http://en.cppreference.com/w/cpp/concept/NullablePointer): - JSON values can be compared against `std::nullptr_t` objects which are used - to model the `null` value. -- Container - - [Container](http://en.cppreference.com/w/cpp/concept/Container): - JSON values can be used like STL containers and provide iterator access. - - [ReversibleContainer](http://en.cppreference.com/w/cpp/concept/ReversibleContainer); - JSON values can be used like STL containers and provide reverse iterator - access. - -@invariant The member variables @a m_value and @a m_type have the following -relationship: -- If `m_type == value_t::object`, then `m_value.object != nullptr`. -- If `m_type == value_t::array`, then `m_value.array != nullptr`. -- If `m_type == value_t::string`, then `m_value.string != nullptr`. -The invariants are checked by member function assert_invariant(). - -@internal -@note ObjectType trick from http://stackoverflow.com/a/9860911 -@endinternal - -@see [RFC 7159: The JavaScript Object Notation (JSON) Data Interchange -Format](http://rfc7159.net/rfc7159) - -@since version 1.0.0 - -@nosubgrouping -*/ -template < - template class ObjectType = std::map, - template class ArrayType = std::vector, - class StringType = std::string, - class BooleanType = bool, - class NumberIntegerType = std::int64_t, - class NumberUnsignedType = std::uint64_t, - class NumberFloatType = double, - template class AllocatorType = std::allocator, - template class JSONSerializer = adl_serializer - > -class basic_json -{ - private: - template friend struct detail::external_constructor; - /// workaround type for MSVC - using basic_json_t = basic_json; - - public: - using value_t = detail::value_t; - // forward declarations - template class iter_impl; - template class json_reverse_iterator; - class json_pointer; - template - using json_serializer = JSONSerializer; - - - //////////////// - // exceptions // - //////////////// - - /// @name exceptions - /// Classes to implement user-defined exceptions. - /// @{ - - /// @copydoc detail::exception - using exception = detail::exception; - /// @copydoc detail::parse_error - using parse_error = detail::parse_error; - /// @copydoc detail::invalid_iterator - using invalid_iterator = detail::invalid_iterator; - /// @copydoc detail::type_error - using type_error = detail::type_error; - /// @copydoc detail::out_of_range - using out_of_range = detail::out_of_range; - /// @copydoc detail::other_error - using other_error = detail::other_error; - - /// @} - - - ///////////////////// - // container types // - ///////////////////// - - /// @name container types - /// The canonic container types to use @ref basic_json like any other STL - /// container. - /// @{ - - /// the type of elements in a basic_json container - using value_type = basic_json; - - /// the type of an element reference - using reference = value_type&; - /// the type of an element const reference - using const_reference = const value_type&; - - /// a type to represent differences between iterators - using difference_type = std::ptrdiff_t; - /// a type to represent container sizes - using size_type = std::size_t; - - /// the allocator type - using allocator_type = AllocatorType; - - /// the type of an element pointer - using pointer = typename std::allocator_traits::pointer; - /// the type of an element const pointer - using const_pointer = typename std::allocator_traits::const_pointer; - - /// an iterator for a basic_json container - using iterator = iter_impl; - /// a const iterator for a basic_json container - using const_iterator = iter_impl; - /// a reverse iterator for a basic_json container - using reverse_iterator = json_reverse_iterator; - /// a const reverse iterator for a basic_json container - using const_reverse_iterator = json_reverse_iterator; - - /// @} - - - /*! - @brief returns the allocator associated with the container - */ - static allocator_type get_allocator() - { - return allocator_type(); - } - - /*! - @brief returns version information on the library - - This function returns a JSON object with information about the library, - including the version number and information on the platform and compiler. - - @return JSON object holding version information - key | description - ----------- | --------------- - `compiler` | Information on the used compiler. It is an object with the following keys: `c++` (the used C++ standard), `family` (the compiler family; possible values are `clang`, `icc`, `gcc`, `ilecpp`, `msvc`, `pgcpp`, `sunpro`, and `unknown`), and `version` (the compiler version). - `copyright` | The copyright line for the library as string. - `name` | The name of the library as string. - `platform` | The used platform as string. Possible values are `win32`, `linux`, `apple`, `unix`, and `unknown`. - `url` | The URL of the project as string. - `version` | The version of the library. It is an object with the following keys: `major`, `minor`, and `patch` as defined by [Semantic Versioning](http://semver.org), and `string` (the version string). - - @liveexample{The following code shows an example output of the `meta()` - function.,meta} - - @complexity Constant. - - @since 2.1.0 - */ - static basic_json meta() - { - basic_json result; - - result["copyright"] = "(C) 2013-2017 Niels Lohmann"; - result["name"] = "JSON for Modern C++"; - result["url"] = "https://github.com/nlohmann/json"; - result["version"] = - { - {"string", "2.1.1"}, {"major", 2}, {"minor", 1}, {"patch", 1} - }; - -#ifdef _WIN32 - result["platform"] = "win32"; -#elif defined __linux__ - result["platform"] = "linux"; -#elif defined __APPLE__ - result["platform"] = "apple"; -#elif defined __unix__ - result["platform"] = "unix"; -#else - result["platform"] = "unknown"; -#endif - -#if defined(__clang__) - result["compiler"] = {{"family", "clang"}, {"version", __clang_version__}}; -#elif defined(__ICC) || defined(__INTEL_COMPILER) - result["compiler"] = {{"family", "icc"}, {"version", __INTEL_COMPILER}}; -#elif defined(__GNUC__) || defined(__GNUG__) - result["compiler"] = {{"family", "gcc"}, {"version", std::to_string(__GNUC__) + "." + std::to_string(__GNUC_MINOR__) + "." + std::to_string(__GNUC_PATCHLEVEL__)}}; -#elif defined(__HP_cc) || defined(__HP_aCC) - result["compiler"] = "hp" -#elif defined(__IBMCPP__) - result["compiler"] = {{"family", "ilecpp"}, {"version", __IBMCPP__}}; -#elif defined(_MSC_VER) - result["compiler"] = {{"family", "msvc"}, {"version", _MSC_VER}}; -#elif defined(__PGI) - result["compiler"] = {{"family", "pgcpp"}, {"version", __PGI}}; -#elif defined(__SUNPRO_CC) - result["compiler"] = {{"family", "sunpro"}, {"version", __SUNPRO_CC}}; -#else - result["compiler"] = {{"family", "unknown"}, {"version", "unknown"}}; -#endif - -#ifdef __cplusplus - result["compiler"]["c++"] = std::to_string(__cplusplus); -#else - result["compiler"]["c++"] = "unknown"; -#endif - return result; - } - - - /////////////////////////// - // JSON value data types // - /////////////////////////// - - /// @name JSON value data types - /// The data types to store a JSON value. These types are derived from - /// the template arguments passed to class @ref basic_json. - /// @{ - - /*! - @brief a type for an object - - [RFC 7159](http://rfc7159.net/rfc7159) describes JSON objects as follows: - > An object is an unordered collection of zero or more name/value pairs, - > where a name is a string and a value is a string, number, boolean, null, - > object, or array. - - To store objects in C++, a type is defined by the template parameters - described below. - - @tparam ObjectType the container to store objects (e.g., `std::map` or - `std::unordered_map`) - @tparam StringType the type of the keys or names (e.g., `std::string`). - The comparison function `std::less` is used to order elements - inside the container. - @tparam AllocatorType the allocator to use for objects (e.g., - `std::allocator`) - - #### Default type - - With the default values for @a ObjectType (`std::map`), @a StringType - (`std::string`), and @a AllocatorType (`std::allocator`), the default - value for @a object_t is: - - @code {.cpp} - std::map< - std::string, // key_type - basic_json, // value_type - std::less, // key_compare - std::allocator> // allocator_type - > - @endcode - - #### Behavior - - The choice of @a object_t influences the behavior of the JSON class. With - the default type, objects have the following behavior: - - - When all names are unique, objects will be interoperable in the sense - that all software implementations receiving that object will agree on - the name-value mappings. - - When the names within an object are not unique, later stored name/value - pairs overwrite previously stored name/value pairs, leaving the used - names unique. For instance, `{"key": 1}` and `{"key": 2, "key": 1}` will - be treated as equal and both stored as `{"key": 1}`. - - Internally, name/value pairs are stored in lexicographical order of the - names. Objects will also be serialized (see @ref dump) in this order. - For instance, `{"b": 1, "a": 2}` and `{"a": 2, "b": 1}` will be stored - and serialized as `{"a": 2, "b": 1}`. - - When comparing objects, the order of the name/value pairs is irrelevant. - This makes objects interoperable in the sense that they will not be - affected by these differences. For instance, `{"b": 1, "a": 2}` and - `{"a": 2, "b": 1}` will be treated as equal. - - #### Limits - - [RFC 7159](http://rfc7159.net/rfc7159) specifies: - > An implementation may set limits on the maximum depth of nesting. - - In this class, the object's limit of nesting is not constraint explicitly. - However, a maximum depth of nesting may be introduced by the compiler or - runtime environment. A theoretical limit can be queried by calling the - @ref max_size function of a JSON object. - - #### Storage - - Objects are stored as pointers in a @ref basic_json type. That is, for any - access to object values, a pointer of type `object_t*` must be - dereferenced. - - @sa @ref array_t -- type for an array value - - @since version 1.0.0 - - @note The order name/value pairs are added to the object is *not* - preserved by the library. Therefore, iterating an object may return - name/value pairs in a different order than they were originally stored. In - fact, keys will be traversed in alphabetical order as `std::map` with - `std::less` is used by default. Please note this behavior conforms to [RFC - 7159](http://rfc7159.net/rfc7159), because any order implements the - specified "unordered" nature of JSON objects. - */ - using object_t = ObjectType, - AllocatorType>>; - - /*! - @brief a type for an array - - [RFC 7159](http://rfc7159.net/rfc7159) describes JSON arrays as follows: - > An array is an ordered sequence of zero or more values. - - To store objects in C++, a type is defined by the template parameters - explained below. - - @tparam ArrayType container type to store arrays (e.g., `std::vector` or - `std::list`) - @tparam AllocatorType allocator to use for arrays (e.g., `std::allocator`) - - #### Default type - - With the default values for @a ArrayType (`std::vector`) and @a - AllocatorType (`std::allocator`), the default value for @a array_t is: - - @code {.cpp} - std::vector< - basic_json, // value_type - std::allocator // allocator_type - > - @endcode - - #### Limits - - [RFC 7159](http://rfc7159.net/rfc7159) specifies: - > An implementation may set limits on the maximum depth of nesting. - - In this class, the array's limit of nesting is not constraint explicitly. - However, a maximum depth of nesting may be introduced by the compiler or - runtime environment. A theoretical limit can be queried by calling the - @ref max_size function of a JSON array. - - #### Storage - - Arrays are stored as pointers in a @ref basic_json type. That is, for any - access to array values, a pointer of type `array_t*` must be dereferenced. - - @sa @ref object_t -- type for an object value - - @since version 1.0.0 - */ - using array_t = ArrayType>; - - /*! - @brief a type for a string - - [RFC 7159](http://rfc7159.net/rfc7159) describes JSON strings as follows: - > A string is a sequence of zero or more Unicode characters. - - To store objects in C++, a type is defined by the template parameter - described below. Unicode values are split by the JSON class into - byte-sized characters during deserialization. - - @tparam StringType the container to store strings (e.g., `std::string`). - Note this container is used for keys/names in objects, see @ref object_t. - - #### Default type - - With the default values for @a StringType (`std::string`), the default - value for @a string_t is: - - @code {.cpp} - std::string - @endcode - - #### Encoding - - Strings are stored in UTF-8 encoding. Therefore, functions like - `std::string::size()` or `std::string::length()` return the number of - bytes in the string rather than the number of characters or glyphs. - - #### String comparison - - [RFC 7159](http://rfc7159.net/rfc7159) states: - > Software implementations are typically required to test names of object - > members for equality. Implementations that transform the textual - > representation into sequences of Unicode code units and then perform the - > comparison numerically, code unit by code unit, are interoperable in the - > sense that implementations will agree in all cases on equality or - > inequality of two strings. For example, implementations that compare - > strings with escaped characters unconverted may incorrectly find that - > `"a\\b"` and `"a\u005Cb"` are not equal. - - This implementation is interoperable as it does compare strings code unit - by code unit. - - #### Storage - - String values are stored as pointers in a @ref basic_json type. That is, - for any access to string values, a pointer of type `string_t*` must be - dereferenced. - - @since version 1.0.0 - */ - using string_t = StringType; - - /*! - @brief a type for a boolean - - [RFC 7159](http://rfc7159.net/rfc7159) implicitly describes a boolean as a - type which differentiates the two literals `true` and `false`. - - To store objects in C++, a type is defined by the template parameter @a - BooleanType which chooses the type to use. - - #### Default type - - With the default values for @a BooleanType (`bool`), the default value for - @a boolean_t is: - - @code {.cpp} - bool - @endcode - - #### Storage - - Boolean values are stored directly inside a @ref basic_json type. - - @since version 1.0.0 - */ - using boolean_t = BooleanType; - - /*! - @brief a type for a number (integer) - - [RFC 7159](http://rfc7159.net/rfc7159) describes numbers as follows: - > The representation of numbers is similar to that used in most - > programming languages. A number is represented in base 10 using decimal - > digits. It contains an integer component that may be prefixed with an - > optional minus sign, which may be followed by a fraction part and/or an - > exponent part. Leading zeros are not allowed. (...) Numeric values that - > cannot be represented in the grammar below (such as Infinity and NaN) - > are not permitted. - - This description includes both integer and floating-point numbers. - However, C++ allows more precise storage if it is known whether the number - is a signed integer, an unsigned integer or a floating-point number. - Therefore, three different types, @ref number_integer_t, @ref - number_unsigned_t and @ref number_float_t are used. - - To store integer numbers in C++, a type is defined by the template - parameter @a NumberIntegerType which chooses the type to use. - - #### Default type - - With the default values for @a NumberIntegerType (`int64_t`), the default - value for @a number_integer_t is: - - @code {.cpp} - int64_t - @endcode - - #### Default behavior - - - The restrictions about leading zeros is not enforced in C++. Instead, - leading zeros in integer literals lead to an interpretation as octal - number. Internally, the value will be stored as decimal number. For - instance, the C++ integer literal `010` will be serialized to `8`. - During deserialization, leading zeros yield an error. - - Not-a-number (NaN) values will be serialized to `null`. - - #### Limits - - [RFC 7159](http://rfc7159.net/rfc7159) specifies: - > An implementation may set limits on the range and precision of numbers. - - When the default type is used, the maximal integer number that can be - stored is `9223372036854775807` (INT64_MAX) and the minimal integer number - that can be stored is `-9223372036854775808` (INT64_MIN). Integer numbers - that are out of range will yield over/underflow when used in a - constructor. During deserialization, too large or small integer numbers - will be automatically be stored as @ref number_unsigned_t or @ref - number_float_t. - - [RFC 7159](http://rfc7159.net/rfc7159) further states: - > Note that when such software is used, numbers that are integers and are - > in the range \f$[-2^{53}+1, 2^{53}-1]\f$ are interoperable in the sense - > that implementations will agree exactly on their numeric values. - - As this range is a subrange of the exactly supported range [INT64_MIN, - INT64_MAX], this class's integer type is interoperable. - - #### Storage - - Integer number values are stored directly inside a @ref basic_json type. - - @sa @ref number_float_t -- type for number values (floating-point) - - @sa @ref number_unsigned_t -- type for number values (unsigned integer) - - @since version 1.0.0 - */ - using number_integer_t = NumberIntegerType; - - /*! - @brief a type for a number (unsigned) - - [RFC 7159](http://rfc7159.net/rfc7159) describes numbers as follows: - > The representation of numbers is similar to that used in most - > programming languages. A number is represented in base 10 using decimal - > digits. It contains an integer component that may be prefixed with an - > optional minus sign, which may be followed by a fraction part and/or an - > exponent part. Leading zeros are not allowed. (...) Numeric values that - > cannot be represented in the grammar below (such as Infinity and NaN) - > are not permitted. - - This description includes both integer and floating-point numbers. - However, C++ allows more precise storage if it is known whether the number - is a signed integer, an unsigned integer or a floating-point number. - Therefore, three different types, @ref number_integer_t, @ref - number_unsigned_t and @ref number_float_t are used. - - To store unsigned integer numbers in C++, a type is defined by the - template parameter @a NumberUnsignedType which chooses the type to use. - - #### Default type - - With the default values for @a NumberUnsignedType (`uint64_t`), the - default value for @a number_unsigned_t is: - - @code {.cpp} - uint64_t - @endcode - - #### Default behavior - - - The restrictions about leading zeros is not enforced in C++. Instead, - leading zeros in integer literals lead to an interpretation as octal - number. Internally, the value will be stored as decimal number. For - instance, the C++ integer literal `010` will be serialized to `8`. - During deserialization, leading zeros yield an error. - - Not-a-number (NaN) values will be serialized to `null`. - - #### Limits - - [RFC 7159](http://rfc7159.net/rfc7159) specifies: - > An implementation may set limits on the range and precision of numbers. - - When the default type is used, the maximal integer number that can be - stored is `18446744073709551615` (UINT64_MAX) and the minimal integer - number that can be stored is `0`. Integer numbers that are out of range - will yield over/underflow when used in a constructor. During - deserialization, too large or small integer numbers will be automatically - be stored as @ref number_integer_t or @ref number_float_t. - - [RFC 7159](http://rfc7159.net/rfc7159) further states: - > Note that when such software is used, numbers that are integers and are - > in the range \f$[-2^{53}+1, 2^{53}-1]\f$ are interoperable in the sense - > that implementations will agree exactly on their numeric values. - - As this range is a subrange (when considered in conjunction with the - number_integer_t type) of the exactly supported range [0, UINT64_MAX], - this class's integer type is interoperable. - - #### Storage - - Integer number values are stored directly inside a @ref basic_json type. - - @sa @ref number_float_t -- type for number values (floating-point) - @sa @ref number_integer_t -- type for number values (integer) - - @since version 2.0.0 - */ - using number_unsigned_t = NumberUnsignedType; - - /*! - @brief a type for a number (floating-point) - - [RFC 7159](http://rfc7159.net/rfc7159) describes numbers as follows: - > The representation of numbers is similar to that used in most - > programming languages. A number is represented in base 10 using decimal - > digits. It contains an integer component that may be prefixed with an - > optional minus sign, which may be followed by a fraction part and/or an - > exponent part. Leading zeros are not allowed. (...) Numeric values that - > cannot be represented in the grammar below (such as Infinity and NaN) - > are not permitted. - - This description includes both integer and floating-point numbers. - However, C++ allows more precise storage if it is known whether the number - is a signed integer, an unsigned integer or a floating-point number. - Therefore, three different types, @ref number_integer_t, @ref - number_unsigned_t and @ref number_float_t are used. - - To store floating-point numbers in C++, a type is defined by the template - parameter @a NumberFloatType which chooses the type to use. - - #### Default type - - With the default values for @a NumberFloatType (`double`), the default - value for @a number_float_t is: - - @code {.cpp} - double - @endcode - - #### Default behavior - - - The restrictions about leading zeros is not enforced in C++. Instead, - leading zeros in floating-point literals will be ignored. Internally, - the value will be stored as decimal number. For instance, the C++ - floating-point literal `01.2` will be serialized to `1.2`. During - deserialization, leading zeros yield an error. - - Not-a-number (NaN) values will be serialized to `null`. - - #### Limits - - [RFC 7159](http://rfc7159.net/rfc7159) states: - > This specification allows implementations to set limits on the range and - > precision of numbers accepted. Since software that implements IEEE - > 754-2008 binary64 (double precision) numbers is generally available and - > widely used, good interoperability can be achieved by implementations - > that expect no more precision or range than these provide, in the sense - > that implementations will approximate JSON numbers within the expected - > precision. - - This implementation does exactly follow this approach, as it uses double - precision floating-point numbers. Note values smaller than - `-1.79769313486232e+308` and values greater than `1.79769313486232e+308` - will be stored as NaN internally and be serialized to `null`. - - #### Storage - - Floating-point number values are stored directly inside a @ref basic_json - type. - - @sa @ref number_integer_t -- type for number values (integer) - - @sa @ref number_unsigned_t -- type for number values (unsigned integer) - - @since version 1.0.0 - */ - using number_float_t = NumberFloatType; - - /// @} - - private: - - /// helper for exception-safe object creation - template - static T* create(Args&& ... args) - { - AllocatorType alloc; - auto deleter = [&](T * object) - { - alloc.deallocate(object, 1); - }; - std::unique_ptr object(alloc.allocate(1), deleter); - alloc.construct(object.get(), std::forward(args)...); - assert(object != nullptr); - return object.release(); - } - - //////////////////////// - // JSON value storage // - //////////////////////// - - /*! - @brief a JSON value - - The actual storage for a JSON value of the @ref basic_json class. This - union combines the different storage types for the JSON value types - defined in @ref value_t. - - JSON type | value_t type | used type - --------- | --------------- | ------------------------ - object | object | pointer to @ref object_t - array | array | pointer to @ref array_t - string | string | pointer to @ref string_t - boolean | boolean | @ref boolean_t - number | number_integer | @ref number_integer_t - number | number_unsigned | @ref number_unsigned_t - number | number_float | @ref number_float_t - null | null | *no value is stored* - - @note Variable-length types (objects, arrays, and strings) are stored as - pointers. The size of the union should not exceed 64 bits if the default - value types are used. - - @since version 1.0.0 - */ - union json_value - { - /// object (stored with pointer to save storage) - object_t* object; - /// array (stored with pointer to save storage) - array_t* array; - /// string (stored with pointer to save storage) - string_t* string; - /// boolean - boolean_t boolean; - /// number (integer) - number_integer_t number_integer; - /// number (unsigned integer) - number_unsigned_t number_unsigned; - /// number (floating-point) - number_float_t number_float; - - /// default constructor (for null values) - json_value() = default; - /// constructor for booleans - json_value(boolean_t v) noexcept : boolean(v) {} - /// constructor for numbers (integer) - json_value(number_integer_t v) noexcept : number_integer(v) {} - /// constructor for numbers (unsigned) - json_value(number_unsigned_t v) noexcept : number_unsigned(v) {} - /// constructor for numbers (floating-point) - json_value(number_float_t v) noexcept : number_float(v) {} - /// constructor for empty values of a given type - json_value(value_t t) - { - switch (t) - { - case value_t::object: - { - object = create(); - break; - } - - case value_t::array: - { - array = create(); - break; - } - - case value_t::string: - { - string = create(""); - break; - } - - case value_t::boolean: - { - boolean = boolean_t(false); - break; - } - - case value_t::number_integer: - { - number_integer = number_integer_t(0); - break; - } - - case value_t::number_unsigned: - { - number_unsigned = number_unsigned_t(0); - break; - } - - case value_t::number_float: - { - number_float = number_float_t(0.0); - break; - } - - case value_t::null: - { - break; - } - - default: - { - if (t == value_t::null) - { - JSON_THROW(other_error(500, "961c151d2e87f2686a955a9be24d316f1362bf21 2.1.1")); // LCOV_EXCL_LINE - } - break; - } - } - } - - /// constructor for strings - json_value(const string_t& value) - { - string = create(value); - } - - /// constructor for objects - json_value(const object_t& value) - { - object = create(value); - } - - /// constructor for arrays - json_value(const array_t& value) - { - array = create(value); - } - }; - - /*! - @brief checks the class invariants - - This function asserts the class invariants. It needs to be called at the - end of every constructor to make sure that created objects respect the - invariant. Furthermore, it has to be called each time the type of a JSON - value is changed, because the invariant expresses a relationship between - @a m_type and @a m_value. - */ - void assert_invariant() const - { - assert(m_type != value_t::object or m_value.object != nullptr); - assert(m_type != value_t::array or m_value.array != nullptr); - assert(m_type != value_t::string or m_value.string != nullptr); - } - - public: - ////////////////////////// - // JSON parser callback // - ////////////////////////// - - /*! - @brief JSON callback events - - This enumeration lists the parser events that can trigger calling a - callback function of type @ref parser_callback_t during parsing. - - @image html callback_events.png "Example when certain parse events are triggered" - - @since version 1.0.0 - */ - enum class parse_event_t : uint8_t - { - /// the parser read `{` and started to process a JSON object - object_start, - /// the parser read `}` and finished processing a JSON object - object_end, - /// the parser read `[` and started to process a JSON array - array_start, - /// the parser read `]` and finished processing a JSON array - array_end, - /// the parser read a key of a value in an object - key, - /// the parser finished reading a JSON value - value - }; - - /*! - @brief per-element parser callback type - - With a parser callback function, the result of parsing a JSON text can be - influenced. When passed to @ref parse(std::istream&, const - parser_callback_t) or @ref parse(const CharT, const parser_callback_t), - it is called on certain events (passed as @ref parse_event_t via parameter - @a event) with a set recursion depth @a depth and context JSON value - @a parsed. The return value of the callback function is a boolean - indicating whether the element that emitted the callback shall be kept or - not. - - We distinguish six scenarios (determined by the event type) in which the - callback function can be called. The following table describes the values - of the parameters @a depth, @a event, and @a parsed. - - parameter @a event | description | parameter @a depth | parameter @a parsed - ------------------ | ----------- | ------------------ | ------------------- - parse_event_t::object_start | the parser read `{` and started to process a JSON object | depth of the parent of the JSON object | a JSON value with type discarded - parse_event_t::key | the parser read a key of a value in an object | depth of the currently parsed JSON object | a JSON string containing the key - parse_event_t::object_end | the parser read `}` and finished processing a JSON object | depth of the parent of the JSON object | the parsed JSON object - parse_event_t::array_start | the parser read `[` and started to process a JSON array | depth of the parent of the JSON array | a JSON value with type discarded - parse_event_t::array_end | the parser read `]` and finished processing a JSON array | depth of the parent of the JSON array | the parsed JSON array - parse_event_t::value | the parser finished reading a JSON value | depth of the value | the parsed JSON value - - @image html callback_events.png "Example when certain parse events are triggered" - - Discarding a value (i.e., returning `false`) has different effects - depending on the context in which function was called: - - - Discarded values in structured types are skipped. That is, the parser - will behave as if the discarded value was never read. - - In case a value outside a structured type is skipped, it is replaced - with `null`. This case happens if the top-level element is skipped. - - @param[in] depth the depth of the recursion during parsing - - @param[in] event an event of type parse_event_t indicating the context in - the callback function has been called - - @param[in,out] parsed the current intermediate parse result; note that - writing to this value has no effect for parse_event_t::key events - - @return Whether the JSON value which called the function during parsing - should be kept (`true`) or not (`false`). In the latter case, it is either - skipped completely or replaced by an empty discarded object. - - @sa @ref parse(std::istream&, parser_callback_t) or - @ref parse(const CharT, const parser_callback_t) for examples - - @since version 1.0.0 - */ - using parser_callback_t = std::function; - - - ////////////////// - // constructors // - ////////////////// - - /// @name constructors and destructors - /// Constructors of class @ref basic_json, copy/move constructor, copy - /// assignment, static functions creating objects, and the destructor. - /// @{ - - /*! - @brief create an empty value with a given type - - Create an empty JSON value with a given type. The value will be default - initialized with an empty value which depends on the type: - - Value type | initial value - ----------- | ------------- - null | `null` - boolean | `false` - string | `""` - number | `0` - object | `{}` - array | `[]` - - @param[in] value_type the type of the value to create - - @complexity Constant. - - @liveexample{The following code shows the constructor for different @ref - value_t values,basic_json__value_t} - - @since version 1.0.0 - */ - basic_json(const value_t value_type) - : m_type(value_type), m_value(value_type) - { - assert_invariant(); - } - - /*! - @brief create a null object - - Create a `null` JSON value. It either takes a null pointer as parameter - (explicitly creating `null`) or no parameter (implicitly creating `null`). - The passed null pointer itself is not read -- it is only used to choose - the right constructor. - - @complexity Constant. - - @exceptionsafety No-throw guarantee: this constructor never throws - exceptions. - - @liveexample{The following code shows the constructor with and without a - null pointer parameter.,basic_json__nullptr_t} - - @since version 1.0.0 - */ - basic_json(std::nullptr_t = nullptr) noexcept - : basic_json(value_t::null) - { - assert_invariant(); - } - - /*! - @brief create a JSON value - - This is a "catch all" constructor for all compatible JSON types; that is, - types for which a `to_json()` method exsits. The constructor forwards the - parameter @a val to that method (to `json_serializer::to_json` method - with `U = uncvref_t`, to be exact). - - Template type @a CompatibleType includes, but is not limited to, the - following types: - - **arrays**: @ref array_t and all kinds of compatible containers such as - `std::vector`, `std::deque`, `std::list`, `std::forward_list`, - `std::array`, `std::set`, `std::unordered_set`, `std::multiset`, and - `unordered_multiset` with a `value_type` from which a @ref basic_json - value can be constructed. - - **objects**: @ref object_t and all kinds of compatible associative - containers such as `std::map`, `std::unordered_map`, `std::multimap`, - and `std::unordered_multimap` with a `key_type` compatible to - @ref string_t and a `value_type` from which a @ref basic_json value can - be constructed. - - **strings**: @ref string_t, string literals, and all compatible string - containers can be used. - - **numbers**: @ref number_integer_t, @ref number_unsigned_t, - @ref number_float_t, and all convertible number types such as `int`, - `size_t`, `int64_t`, `float` or `double` can be used. - - **boolean**: @ref boolean_t / `bool` can be used. - - See the examples below. - - @tparam CompatibleType a type such that: - - @a CompatibleType is not derived from `std::istream`, - - @a CompatibleType is not @ref basic_json (to avoid hijacking copy/move - constructors), - - @a CompatibleType is not a @ref basic_json nested type (e.g., - @ref json_pointer, @ref iterator, etc ...) - - @ref @ref json_serializer has a - `to_json(basic_json_t&, CompatibleType&&)` method - - @tparam U = `uncvref_t` - - @param[in] val the value to be forwarded - - @complexity Usually linear in the size of the passed @a val, also - depending on the implementation of the called `to_json()` - method. - - @throw what `json_serializer::to_json()` throws - - @liveexample{The following code shows the constructor with several - compatible types.,basic_json__CompatibleType} - - @since version 2.1.0 - */ - template, - detail::enable_if_t::value and - not std::is_same::value and - not detail::is_basic_json_nested_type< - basic_json_t, U>::value and - detail::has_to_json::value, - int> = 0> - basic_json(CompatibleType && val) noexcept(noexcept(JSONSerializer::to_json( - std::declval(), std::forward(val)))) - { - JSONSerializer::to_json(*this, std::forward(val)); - assert_invariant(); - } - - /*! - @brief create a container (array or object) from an initializer list - - Creates a JSON value of type array or object from the passed initializer - list @a init. In case @a type_deduction is `true` (default), the type of - the JSON value to be created is deducted from the initializer list @a init - according to the following rules: - - 1. If the list is empty, an empty JSON object value `{}` is created. - 2. If the list consists of pairs whose first element is a string, a JSON - object value is created where the first elements of the pairs are - treated as keys and the second elements are as values. - 3. In all other cases, an array is created. - - The rules aim to create the best fit between a C++ initializer list and - JSON values. The rationale is as follows: - - 1. The empty initializer list is written as `{}` which is exactly an empty - JSON object. - 2. C++ has now way of describing mapped types other than to list a list of - pairs. As JSON requires that keys must be of type string, rule 2 is the - weakest constraint one can pose on initializer lists to interpret them - as an object. - 3. In all other cases, the initializer list could not be interpreted as - JSON object type, so interpreting it as JSON array type is safe. - - With the rules described above, the following JSON values cannot be - expressed by an initializer list: - - - the empty array (`[]`): use @ref array(std::initializer_list) - with an empty initializer list in this case - - arrays whose elements satisfy rule 2: use @ref - array(std::initializer_list) with the same initializer list - in this case - - @note When used without parentheses around an empty initializer list, @ref - basic_json() is called instead of this function, yielding the JSON null - value. - - @param[in] init initializer list with JSON values - - @param[in] type_deduction internal parameter; when set to `true`, the type - of the JSON value is deducted from the initializer list @a init; when set - to `false`, the type provided via @a manual_type is forced. This mode is - used by the functions @ref array(std::initializer_list) and - @ref object(std::initializer_list). - - @param[in] manual_type internal parameter; when @a type_deduction is set - to `false`, the created JSON value will use the provided type (only @ref - value_t::array and @ref value_t::object are valid); when @a type_deduction - is set to `true`, this parameter has no effect - - @throw type_error.301 if @a type_deduction is `false`, @a manual_type is - `value_t::object`, but @a init contains an element which is not a pair - whose first element is a string. In this case, the constructor could not - create an object. If @a type_deduction would have be `true`, an array - would have been created. See @ref object(std::initializer_list) - for an example. - - @complexity Linear in the size of the initializer list @a init. - - @liveexample{The example below shows how JSON values are created from - initializer lists.,basic_json__list_init_t} - - @sa @ref array(std::initializer_list) -- create a JSON array - value from an initializer list - @sa @ref object(std::initializer_list) -- create a JSON object - value from an initializer list - - @since version 1.0.0 - */ - basic_json(std::initializer_list init, - bool type_deduction = true, - value_t manual_type = value_t::array) - { - // check if each element is an array with two elements whose first - // element is a string - bool is_an_object = std::all_of(init.begin(), init.end(), - [](const basic_json & element) - { - return element.is_array() and element.size() == 2 and element[0].is_string(); - }); - - // adjust type if type deduction is not wanted - if (not type_deduction) - { - // if array is wanted, do not create an object though possible - if (manual_type == value_t::array) - { - is_an_object = false; - } - - // if object is wanted but impossible, throw an exception - if (manual_type == value_t::object and not is_an_object) - { - JSON_THROW(type_error(301, "cannot create object from initializer list")); - } - } - - if (is_an_object) - { - // the initializer list is a list of pairs -> create object - m_type = value_t::object; - m_value = value_t::object; - - std::for_each(init.begin(), init.end(), [this](const basic_json & element) - { - m_value.object->emplace(*(element[0].m_value.string), element[1]); - }); - } - else - { - // the initializer list describes an array -> create array - m_type = value_t::array; - m_value.array = create(init); - } - - assert_invariant(); - } - - /*! - @brief explicitly create an array from an initializer list - - Creates a JSON array value from a given initializer list. That is, given a - list of values `a, b, c`, creates the JSON value `[a, b, c]`. If the - initializer list is empty, the empty array `[]` is created. - - @note This function is only needed to express two edge cases that cannot - be realized with the initializer list constructor (@ref - basic_json(std::initializer_list, bool, value_t)). These cases - are: - 1. creating an array whose elements are all pairs whose first element is a - string -- in this case, the initializer list constructor would create an - object, taking the first elements as keys - 2. creating an empty array -- passing the empty initializer list to the - initializer list constructor yields an empty object - - @param[in] init initializer list with JSON values to create an array from - (optional) - - @return JSON array value - - @complexity Linear in the size of @a init. - - @liveexample{The following code shows an example for the `array` - function.,array} - - @sa @ref basic_json(std::initializer_list, bool, value_t) -- - create a JSON value from an initializer list - @sa @ref object(std::initializer_list) -- create a JSON object - value from an initializer list - - @since version 1.0.0 - */ - static basic_json array(std::initializer_list init = - std::initializer_list()) - { - return basic_json(init, false, value_t::array); - } - - /*! - @brief explicitly create an object from an initializer list - - Creates a JSON object value from a given initializer list. The initializer - lists elements must be pairs, and their first elements must be strings. If - the initializer list is empty, the empty object `{}` is created. - - @note This function is only added for symmetry reasons. In contrast to the - related function @ref array(std::initializer_list), there are - no cases which can only be expressed by this function. That is, any - initializer list @a init can also be passed to the initializer list - constructor @ref basic_json(std::initializer_list, bool, value_t). - - @param[in] init initializer list to create an object from (optional) - - @return JSON object value - - @throw type_error.301 if @a init is not a list of pairs whose first - elements are strings. In this case, no object can be created. When such a - value is passed to @ref basic_json(std::initializer_list, bool, value_t), - an array would have been created from the passed initializer list @a init. - See example below. - - @complexity Linear in the size of @a init. - - @liveexample{The following code shows an example for the `object` - function.,object} - - @sa @ref basic_json(std::initializer_list, bool, value_t) -- - create a JSON value from an initializer list - @sa @ref array(std::initializer_list) -- create a JSON array - value from an initializer list - - @since version 1.0.0 - */ - static basic_json object(std::initializer_list init = - std::initializer_list()) - { - return basic_json(init, false, value_t::object); - } - - /*! - @brief construct an array with count copies of given value - - Constructs a JSON array value by creating @a cnt copies of a passed value. - In case @a cnt is `0`, an empty array is created. As postcondition, - `std::distance(begin(),end()) == cnt` holds. - - @param[in] cnt the number of JSON copies of @a val to create - @param[in] val the JSON value to copy - - @complexity Linear in @a cnt. - - @liveexample{The following code shows examples for the @ref - basic_json(size_type\, const basic_json&) - constructor.,basic_json__size_type_basic_json} - - @since version 1.0.0 - */ - basic_json(size_type cnt, const basic_json& val) - : m_type(value_t::array) - { - m_value.array = create(cnt, val); - assert_invariant(); - } - - /*! - @brief construct a JSON container given an iterator range - - Constructs the JSON value with the contents of the range `[first, last)`. - The semantics depends on the different types a JSON value can have: - - In case of primitive types (number, boolean, or string), @a first must - be `begin()` and @a last must be `end()`. In this case, the value is - copied. Otherwise, invalid_iterator.204 is thrown. - - In case of structured types (array, object), the constructor behaves as - similar versions for `std::vector`. - - In case of a null type, invalid_iterator.206 is thrown. - - @tparam InputIT an input iterator type (@ref iterator or @ref - const_iterator) - - @param[in] first begin of the range to copy from (included) - @param[in] last end of the range to copy from (excluded) - - @pre Iterators @a first and @a last must be initialized. **This - precondition is enforced with an assertion.** - - @pre Range `[first, last)` is valid. Usually, this precondition cannot be - checked efficiently. Only certain edge cases are detected; see the - description of the exceptions below. - - @throw invalid_iterator.201 if iterators @a first and @a last are not - compatible (i.e., do not belong to the same JSON value). In this case, - the range `[first, last)` is undefined. - @throw invalid_iterator.204 if iterators @a first and @a last belong to a - primitive type (number, boolean, or string), but @a first does not point - to the first element any more. In this case, the range `[first, last)` is - undefined. See example code below. - @throw invalid_iterator.206 if iterators @a first and @a last belong to a - null value. In this case, the range `[first, last)` is undefined. - - @complexity Linear in distance between @a first and @a last. - - @liveexample{The example below shows several ways to create JSON values by - specifying a subrange with iterators.,basic_json__InputIt_InputIt} - - @since version 1.0.0 - */ - template::value or - std::is_same::value, int>::type = 0> - basic_json(InputIT first, InputIT last) - { - assert(first.m_object != nullptr); - assert(last.m_object != nullptr); - - // make sure iterator fits the current value - if (first.m_object != last.m_object) - { - JSON_THROW(invalid_iterator(201, "iterators are not compatible")); - } - - // copy type from first iterator - m_type = first.m_object->m_type; - - // check if iterator range is complete for primitive values - switch (m_type) - { - case value_t::boolean: - case value_t::number_float: - case value_t::number_integer: - case value_t::number_unsigned: - case value_t::string: - { - if (not first.m_it.primitive_iterator.is_begin() or not last.m_it.primitive_iterator.is_end()) - { - JSON_THROW(invalid_iterator(204, "iterators out of range")); - } - break; - } - - default: - { - break; - } - } - - switch (m_type) - { - case value_t::number_integer: - { - m_value.number_integer = first.m_object->m_value.number_integer; - break; - } - - case value_t::number_unsigned: - { - m_value.number_unsigned = first.m_object->m_value.number_unsigned; - break; - } - - case value_t::number_float: - { - m_value.number_float = first.m_object->m_value.number_float; - break; - } - - case value_t::boolean: - { - m_value.boolean = first.m_object->m_value.boolean; - break; - } - - case value_t::string: - { - m_value = *first.m_object->m_value.string; - break; - } - - case value_t::object: - { - m_value.object = create(first.m_it.object_iterator, - last.m_it.object_iterator); - break; - } - - case value_t::array: - { - m_value.array = create(first.m_it.array_iterator, - last.m_it.array_iterator); - break; - } - - default: - { - JSON_THROW(invalid_iterator(206, "cannot construct with iterators from " + - first.m_object->type_name())); - } - } - - assert_invariant(); - } - - - /////////////////////////////////////// - // other constructors and destructor // - /////////////////////////////////////// - - /*! - @brief copy constructor - - Creates a copy of a given JSON value. - - @param[in] other the JSON value to copy - - @complexity Linear in the size of @a other. - - @requirement This function helps `basic_json` satisfying the - [Container](http://en.cppreference.com/w/cpp/concept/Container) - requirements: - - The complexity is linear. - - As postcondition, it holds: `other == basic_json(other)`. - - @liveexample{The following code shows an example for the copy - constructor.,basic_json__basic_json} - - @since version 1.0.0 - */ - basic_json(const basic_json& other) - : m_type(other.m_type) - { - // check of passed value is valid - other.assert_invariant(); - - switch (m_type) - { - case value_t::object: - { - m_value = *other.m_value.object; - break; - } - - case value_t::array: - { - m_value = *other.m_value.array; - break; - } - - case value_t::string: - { - m_value = *other.m_value.string; - break; - } - - case value_t::boolean: - { - m_value = other.m_value.boolean; - break; - } - - case value_t::number_integer: - { - m_value = other.m_value.number_integer; - break; - } - - case value_t::number_unsigned: - { - m_value = other.m_value.number_unsigned; - break; - } - - case value_t::number_float: - { - m_value = other.m_value.number_float; - break; - } - - default: - { - break; - } - } - - assert_invariant(); - } - - /*! - @brief move constructor - - Move constructor. Constructs a JSON value with the contents of the given - value @a other using move semantics. It "steals" the resources from @a - other and leaves it as JSON null value. - - @param[in,out] other value to move to this object - - @post @a other is a JSON null value - - @complexity Constant. - - @liveexample{The code below shows the move constructor explicitly called - via std::move.,basic_json__moveconstructor} - - @since version 1.0.0 - */ - basic_json(basic_json&& other) noexcept - : m_type(std::move(other.m_type)), - m_value(std::move(other.m_value)) - { - // check that passed value is valid - other.assert_invariant(); - - // invalidate payload - other.m_type = value_t::null; - other.m_value = {}; - - assert_invariant(); - } - - /*! - @brief copy assignment - - Copy assignment operator. Copies a JSON value via the "copy and swap" - strategy: It is expressed in terms of the copy constructor, destructor, - and the swap() member function. - - @param[in] other value to copy from - - @complexity Linear. - - @requirement This function helps `basic_json` satisfying the - [Container](http://en.cppreference.com/w/cpp/concept/Container) - requirements: - - The complexity is linear. - - @liveexample{The code below shows and example for the copy assignment. It - creates a copy of value `a` which is then swapped with `b`. Finally\, the - copy of `a` (which is the null value after the swap) is - destroyed.,basic_json__copyassignment} - - @since version 1.0.0 - */ - reference& operator=(basic_json other) noexcept ( - std::is_nothrow_move_constructible::value and - std::is_nothrow_move_assignable::value and - std::is_nothrow_move_constructible::value and - std::is_nothrow_move_assignable::value - ) - { - // check that passed value is valid - other.assert_invariant(); - - using std::swap; - swap(m_type, other.m_type); - swap(m_value, other.m_value); - - assert_invariant(); - return *this; - } - - /*! - @brief destructor - - Destroys the JSON value and frees all allocated memory. - - @complexity Linear. - - @requirement This function helps `basic_json` satisfying the - [Container](http://en.cppreference.com/w/cpp/concept/Container) - requirements: - - The complexity is linear. - - All stored elements are destroyed and all memory is freed. - - @since version 1.0.0 - */ - ~basic_json() - { - assert_invariant(); - - switch (m_type) - { - case value_t::object: - { - AllocatorType alloc; - alloc.destroy(m_value.object); - alloc.deallocate(m_value.object, 1); - break; - } - - case value_t::array: - { - AllocatorType alloc; - alloc.destroy(m_value.array); - alloc.deallocate(m_value.array, 1); - break; - } - - case value_t::string: - { - AllocatorType alloc; - alloc.destroy(m_value.string); - alloc.deallocate(m_value.string, 1); - break; - } - - default: - { - // all other types need no specific destructor - break; - } - } - } - - /// @} - - public: - /////////////////////// - // object inspection // - /////////////////////// - - /// @name object inspection - /// Functions to inspect the type of a JSON value. - /// @{ - - /*! - @brief serialization - - Serialization function for JSON values. The function tries to mimic - Python's `json.dumps()` function, and currently supports its @a indent - parameter. - - @param[in] indent If indent is nonnegative, then array elements and object - members will be pretty-printed with that indent level. An indent level of - `0` will only insert newlines. `-1` (the default) selects the most compact - representation. - - @return string containing the serialization of the JSON value - - @complexity Linear. - - @liveexample{The following example shows the effect of different @a indent - parameters to the result of the serialization.,dump} - - @see https://docs.python.org/2/library/json.html#json.dump - - @since version 1.0.0 - */ - string_t dump(const int indent = -1) const - { - std::stringstream ss; - serializer s(ss); - - if (indent >= 0) - { - s.dump(*this, true, static_cast(indent)); - } - else - { - s.dump(*this, false, 0); - } - - return ss.str(); - } - - /*! - @brief return the type of the JSON value (explicit) - - Return the type of the JSON value as a value from the @ref value_t - enumeration. - - @return the type of the JSON value - - @complexity Constant. - - @exceptionsafety No-throw guarantee: this member function never throws - exceptions. - - @liveexample{The following code exemplifies `type()` for all JSON - types.,type} - - @since version 1.0.0 - */ - constexpr value_t type() const noexcept - { - return m_type; - } - - /*! - @brief return whether type is primitive - - This function returns true iff the JSON type is primitive (string, number, - boolean, or null). - - @return `true` if type is primitive (string, number, boolean, or null), - `false` otherwise. - - @complexity Constant. - - @exceptionsafety No-throw guarantee: this member function never throws - exceptions. - - @liveexample{The following code exemplifies `is_primitive()` for all JSON - types.,is_primitive} - - @sa @ref is_structured() -- returns whether JSON value is structured - @sa @ref is_null() -- returns whether JSON value is `null` - @sa @ref is_string() -- returns whether JSON value is a string - @sa @ref is_boolean() -- returns whether JSON value is a boolean - @sa @ref is_number() -- returns whether JSON value is a number - - @since version 1.0.0 - */ - constexpr bool is_primitive() const noexcept - { - return is_null() or is_string() or is_boolean() or is_number(); - } - - /*! - @brief return whether type is structured - - This function returns true iff the JSON type is structured (array or - object). - - @return `true` if type is structured (array or object), `false` otherwise. - - @complexity Constant. - - @exceptionsafety No-throw guarantee: this member function never throws - exceptions. - - @liveexample{The following code exemplifies `is_structured()` for all JSON - types.,is_structured} - - @sa @ref is_primitive() -- returns whether value is primitive - @sa @ref is_array() -- returns whether value is an array - @sa @ref is_object() -- returns whether value is an object - - @since version 1.0.0 - */ - constexpr bool is_structured() const noexcept - { - return is_array() or is_object(); - } - - /*! - @brief return whether value is null - - This function returns true iff the JSON value is null. - - @return `true` if type is null, `false` otherwise. - - @complexity Constant. - - @exceptionsafety No-throw guarantee: this member function never throws - exceptions. - - @liveexample{The following code exemplifies `is_null()` for all JSON - types.,is_null} - - @since version 1.0.0 - */ - constexpr bool is_null() const noexcept - { - return m_type == value_t::null; - } - - /*! - @brief return whether value is a boolean - - This function returns true iff the JSON value is a boolean. - - @return `true` if type is boolean, `false` otherwise. - - @complexity Constant. - - @exceptionsafety No-throw guarantee: this member function never throws - exceptions. - - @liveexample{The following code exemplifies `is_boolean()` for all JSON - types.,is_boolean} - - @since version 1.0.0 - */ - constexpr bool is_boolean() const noexcept - { - return m_type == value_t::boolean; - } - - /*! - @brief return whether value is a number - - This function returns true iff the JSON value is a number. This includes - both integer and floating-point values. - - @return `true` if type is number (regardless whether integer, unsigned - integer or floating-type), `false` otherwise. - - @complexity Constant. - - @exceptionsafety No-throw guarantee: this member function never throws - exceptions. - - @liveexample{The following code exemplifies `is_number()` for all JSON - types.,is_number} - - @sa @ref is_number_integer() -- check if value is an integer or unsigned - integer number - @sa @ref is_number_unsigned() -- check if value is an unsigned integer - number - @sa @ref is_number_float() -- check if value is a floating-point number - - @since version 1.0.0 - */ - constexpr bool is_number() const noexcept - { - return is_number_integer() or is_number_float(); - } - - /*! - @brief return whether value is an integer number - - This function returns true iff the JSON value is an integer or unsigned - integer number. This excludes floating-point values. - - @return `true` if type is an integer or unsigned integer number, `false` - otherwise. - - @complexity Constant. - - @exceptionsafety No-throw guarantee: this member function never throws - exceptions. - - @liveexample{The following code exemplifies `is_number_integer()` for all - JSON types.,is_number_integer} - - @sa @ref is_number() -- check if value is a number - @sa @ref is_number_unsigned() -- check if value is an unsigned integer - number - @sa @ref is_number_float() -- check if value is a floating-point number - - @since version 1.0.0 - */ - constexpr bool is_number_integer() const noexcept - { - return m_type == value_t::number_integer or m_type == value_t::number_unsigned; - } - - /*! - @brief return whether value is an unsigned integer number - - This function returns true iff the JSON value is an unsigned integer - number. This excludes floating-point and (signed) integer values. - - @return `true` if type is an unsigned integer number, `false` otherwise. - - @complexity Constant. - - @exceptionsafety No-throw guarantee: this member function never throws - exceptions. - - @liveexample{The following code exemplifies `is_number_unsigned()` for all - JSON types.,is_number_unsigned} - - @sa @ref is_number() -- check if value is a number - @sa @ref is_number_integer() -- check if value is an integer or unsigned - integer number - @sa @ref is_number_float() -- check if value is a floating-point number - - @since version 2.0.0 - */ - constexpr bool is_number_unsigned() const noexcept - { - return m_type == value_t::number_unsigned; - } - - /*! - @brief return whether value is a floating-point number - - This function returns true iff the JSON value is a floating-point number. - This excludes integer and unsigned integer values. - - @return `true` if type is a floating-point number, `false` otherwise. - - @complexity Constant. - - @exceptionsafety No-throw guarantee: this member function never throws - exceptions. - - @liveexample{The following code exemplifies `is_number_float()` for all - JSON types.,is_number_float} - - @sa @ref is_number() -- check if value is number - @sa @ref is_number_integer() -- check if value is an integer number - @sa @ref is_number_unsigned() -- check if value is an unsigned integer - number - - @since version 1.0.0 - */ - constexpr bool is_number_float() const noexcept - { - return m_type == value_t::number_float; - } - - /*! - @brief return whether value is an object - - This function returns true iff the JSON value is an object. - - @return `true` if type is object, `false` otherwise. - - @complexity Constant. - - @exceptionsafety No-throw guarantee: this member function never throws - exceptions. - - @liveexample{The following code exemplifies `is_object()` for all JSON - types.,is_object} - - @since version 1.0.0 - */ - constexpr bool is_object() const noexcept - { - return m_type == value_t::object; - } - - /*! - @brief return whether value is an array - - This function returns true iff the JSON value is an array. - - @return `true` if type is array, `false` otherwise. - - @complexity Constant. - - @exceptionsafety No-throw guarantee: this member function never throws - exceptions. - - @liveexample{The following code exemplifies `is_array()` for all JSON - types.,is_array} - - @since version 1.0.0 - */ - constexpr bool is_array() const noexcept - { - return m_type == value_t::array; - } - - /*! - @brief return whether value is a string - - This function returns true iff the JSON value is a string. - - @return `true` if type is string, `false` otherwise. - - @complexity Constant. - - @exceptionsafety No-throw guarantee: this member function never throws - exceptions. - - @liveexample{The following code exemplifies `is_string()` for all JSON - types.,is_string} - - @since version 1.0.0 - */ - constexpr bool is_string() const noexcept - { - return m_type == value_t::string; - } - - /*! - @brief return whether value is discarded - - This function returns true iff the JSON value was discarded during parsing - with a callback function (see @ref parser_callback_t). - - @note This function will always be `false` for JSON values after parsing. - That is, discarded values can only occur during parsing, but will be - removed when inside a structured value or replaced by null in other cases. - - @return `true` if type is discarded, `false` otherwise. - - @complexity Constant. - - @exceptionsafety No-throw guarantee: this member function never throws - exceptions. - - @liveexample{The following code exemplifies `is_discarded()` for all JSON - types.,is_discarded} - - @since version 1.0.0 - */ - constexpr bool is_discarded() const noexcept - { - return m_type == value_t::discarded; - } - - /*! - @brief return the type of the JSON value (implicit) - - Implicitly return the type of the JSON value as a value from the @ref - value_t enumeration. - - @return the type of the JSON value - - @complexity Constant. - - @exceptionsafety No-throw guarantee: this member function never throws - exceptions. - - @liveexample{The following code exemplifies the @ref value_t operator for - all JSON types.,operator__value_t} - - @since version 1.0.0 - */ - constexpr operator value_t() const noexcept - { - return m_type; - } - - /// @} - - private: - ////////////////// - // value access // - ////////////////// - - /// get a boolean (explicit) - boolean_t get_impl(boolean_t* /*unused*/) const - { - if (is_boolean()) - { - return m_value.boolean; - } - - JSON_THROW(type_error(302, "type must be boolean, but is " + type_name())); - } - - /// get a pointer to the value (object) - object_t* get_impl_ptr(object_t* /*unused*/) noexcept - { - return is_object() ? m_value.object : nullptr; - } - - /// get a pointer to the value (object) - constexpr const object_t* get_impl_ptr(const object_t* /*unused*/) const noexcept - { - return is_object() ? m_value.object : nullptr; - } - - /// get a pointer to the value (array) - array_t* get_impl_ptr(array_t* /*unused*/) noexcept - { - return is_array() ? m_value.array : nullptr; - } - - /// get a pointer to the value (array) - constexpr const array_t* get_impl_ptr(const array_t* /*unused*/) const noexcept - { - return is_array() ? m_value.array : nullptr; - } - - /// get a pointer to the value (string) - string_t* get_impl_ptr(string_t* /*unused*/) noexcept - { - return is_string() ? m_value.string : nullptr; - } - - /// get a pointer to the value (string) - constexpr const string_t* get_impl_ptr(const string_t* /*unused*/) const noexcept - { - return is_string() ? m_value.string : nullptr; - } - - /// get a pointer to the value (boolean) - boolean_t* get_impl_ptr(boolean_t* /*unused*/) noexcept - { - return is_boolean() ? &m_value.boolean : nullptr; - } - - /// get a pointer to the value (boolean) - constexpr const boolean_t* get_impl_ptr(const boolean_t* /*unused*/) const noexcept - { - return is_boolean() ? &m_value.boolean : nullptr; - } - - /// get a pointer to the value (integer number) - number_integer_t* get_impl_ptr(number_integer_t* /*unused*/) noexcept - { - return is_number_integer() ? &m_value.number_integer : nullptr; - } - - /// get a pointer to the value (integer number) - constexpr const number_integer_t* get_impl_ptr(const number_integer_t* /*unused*/) const noexcept - { - return is_number_integer() ? &m_value.number_integer : nullptr; - } - - /// get a pointer to the value (unsigned number) - number_unsigned_t* get_impl_ptr(number_unsigned_t* /*unused*/) noexcept - { - return is_number_unsigned() ? &m_value.number_unsigned : nullptr; - } - - /// get a pointer to the value (unsigned number) - constexpr const number_unsigned_t* get_impl_ptr(const number_unsigned_t* /*unused*/) const noexcept - { - return is_number_unsigned() ? &m_value.number_unsigned : nullptr; - } - - /// get a pointer to the value (floating-point number) - number_float_t* get_impl_ptr(number_float_t* /*unused*/) noexcept - { - return is_number_float() ? &m_value.number_float : nullptr; - } - - /// get a pointer to the value (floating-point number) - constexpr const number_float_t* get_impl_ptr(const number_float_t* /*unused*/) const noexcept - { - return is_number_float() ? &m_value.number_float : nullptr; - } - - /*! - @brief helper function to implement get_ref() - - This funcion helps to implement get_ref() without code duplication for - const and non-const overloads - - @tparam ThisType will be deduced as `basic_json` or `const basic_json` - - @throw type_error.303 if ReferenceType does not match underlying value - type of the current JSON - */ - template - static ReferenceType get_ref_impl(ThisType& obj) - { - // helper type - using PointerType = typename std::add_pointer::type; - - // delegate the call to get_ptr<>() - auto ptr = obj.template get_ptr(); - - if (ptr != nullptr) - { - return *ptr; - } - - JSON_THROW(type_error(303, "incompatible ReferenceType for get_ref, actual type is " + obj.type_name())); - } - - public: - /// @name value access - /// Direct access to the stored value of a JSON value. - /// @{ - - /*! - @brief get special-case overload - - This overloads avoids a lot of template boilerplate, it can be seen as the - identity method - - @tparam BasicJsonType == @ref basic_json - - @return a copy of *this - - @complexity Constant. - - @since version 2.1.0 - */ - template < - typename BasicJsonType, - detail::enable_if_t::type, - basic_json_t>::value, - int> = 0 > - basic_json get() const - { - return *this; - } - - /*! - @brief get a value (explicit) - - Explicit type conversion between the JSON value and a compatible value - which is [CopyConstructible](http://en.cppreference.com/w/cpp/concept/CopyConstructible) - and [DefaultConstructible](http://en.cppreference.com/w/cpp/concept/DefaultConstructible). - The value is converted by calling the @ref json_serializer - `from_json()` method. - - The function is equivalent to executing - @code {.cpp} - ValueType ret; - JSONSerializer::from_json(*this, ret); - return ret; - @endcode - - This overloads is chosen if: - - @a ValueType is not @ref basic_json, - - @ref json_serializer has a `from_json()` method of the form - `void from_json(const @ref basic_json&, ValueType&)`, and - - @ref json_serializer does not have a `from_json()` method of - the form `ValueType from_json(const @ref basic_json&)` - - @tparam ValueTypeCV the provided value type - @tparam ValueType the returned value type - - @return copy of the JSON value, converted to @a ValueType - - @throw what @ref json_serializer `from_json()` method throws - - @liveexample{The example below shows several conversions from JSON values - to other types. There a few things to note: (1) Floating-point numbers can - be converted to integers\, (2) A JSON array can be converted to a standard - `std::vector`\, (3) A JSON object can be converted to C++ - associative containers such as `std::unordered_map`.,get__ValueType_const} - - @since version 2.1.0 - */ - template < - typename ValueTypeCV, - typename ValueType = detail::uncvref_t, - detail::enable_if_t < - not std::is_same::value and - detail::has_from_json::value and - not detail::has_non_default_from_json::value, - int > = 0 > - ValueType get() const noexcept(noexcept( - JSONSerializer::from_json(std::declval(), std::declval()))) - { - // we cannot static_assert on ValueTypeCV being non-const, because - // there is support for get(), which is why we - // still need the uncvref - static_assert(not std::is_reference::value, - "get() cannot be used with reference types, you might want to use get_ref()"); - static_assert(std::is_default_constructible::value, - "types must be DefaultConstructible when used with get()"); - - ValueType ret; - JSONSerializer::from_json(*this, ret); - return ret; - } - - /*! - @brief get a value (explicit); special case - - Explicit type conversion between the JSON value and a compatible value - which is **not** [CopyConstructible](http://en.cppreference.com/w/cpp/concept/CopyConstructible) - and **not** [DefaultConstructible](http://en.cppreference.com/w/cpp/concept/DefaultConstructible). - The value is converted by calling the @ref json_serializer - `from_json()` method. - - The function is equivalent to executing - @code {.cpp} - return JSONSerializer::from_json(*this); - @endcode - - This overloads is chosen if: - - @a ValueType is not @ref basic_json and - - @ref json_serializer has a `from_json()` method of the form - `ValueType from_json(const @ref basic_json&)` - - @note If @ref json_serializer has both overloads of - `from_json()`, this one is chosen. - - @tparam ValueTypeCV the provided value type - @tparam ValueType the returned value type - - @return copy of the JSON value, converted to @a ValueType - - @throw what @ref json_serializer `from_json()` method throws - - @since version 2.1.0 - */ - template < - typename ValueTypeCV, - typename ValueType = detail::uncvref_t, - detail::enable_if_t::value and - detail::has_non_default_from_json::value, int> = 0 > - ValueType get() const noexcept(noexcept( - JSONSerializer::from_json(std::declval()))) - { - static_assert(not std::is_reference::value, - "get() cannot be used with reference types, you might want to use get_ref()"); - return JSONSerializer::from_json(*this); - } - - /*! - @brief get a pointer value (explicit) - - Explicit pointer access to the internally stored JSON value. No copies are - made. - - @warning The pointer becomes invalid if the underlying JSON object - changes. - - @tparam PointerType pointer type; must be a pointer to @ref array_t, @ref - object_t, @ref string_t, @ref boolean_t, @ref number_integer_t, - @ref number_unsigned_t, or @ref number_float_t. - - @return pointer to the internally stored JSON value if the requested - pointer type @a PointerType fits to the JSON value; `nullptr` otherwise - - @complexity Constant. - - @liveexample{The example below shows how pointers to internal values of a - JSON value can be requested. Note that no type conversions are made and a - `nullptr` is returned if the value and the requested pointer type does not - match.,get__PointerType} - - @sa @ref get_ptr() for explicit pointer-member access - - @since version 1.0.0 - */ - template::value, int>::type = 0> - PointerType get() noexcept - { - // delegate the call to get_ptr - return get_ptr(); - } - - /*! - @brief get a pointer value (explicit) - @copydoc get() - */ - template::value, int>::type = 0> - constexpr const PointerType get() const noexcept - { - // delegate the call to get_ptr - return get_ptr(); - } - - /*! - @brief get a pointer value (implicit) - - Implicit pointer access to the internally stored JSON value. No copies are - made. - - @warning Writing data to the pointee of the result yields an undefined - state. - - @tparam PointerType pointer type; must be a pointer to @ref array_t, @ref - object_t, @ref string_t, @ref boolean_t, @ref number_integer_t, - @ref number_unsigned_t, or @ref number_float_t. Enforced by a static - assertion. - - @return pointer to the internally stored JSON value if the requested - pointer type @a PointerType fits to the JSON value; `nullptr` otherwise - - @complexity Constant. - - @liveexample{The example below shows how pointers to internal values of a - JSON value can be requested. Note that no type conversions are made and a - `nullptr` is returned if the value and the requested pointer type does not - match.,get_ptr} - - @since version 1.0.0 - */ - template::value, int>::type = 0> - PointerType get_ptr() noexcept - { - // get the type of the PointerType (remove pointer and const) - using pointee_t = typename std::remove_const::type>::type>::type; - // make sure the type matches the allowed types - static_assert( - std::is_same::value - or std::is_same::value - or std::is_same::value - or std::is_same::value - or std::is_same::value - or std::is_same::value - or std::is_same::value - , "incompatible pointer type"); - - // delegate the call to get_impl_ptr<>() - return get_impl_ptr(static_cast(nullptr)); - } - - /*! - @brief get a pointer value (implicit) - @copydoc get_ptr() - */ - template::value and - std::is_const::type>::value, int>::type = 0> - constexpr const PointerType get_ptr() const noexcept - { - // get the type of the PointerType (remove pointer and const) - using pointee_t = typename std::remove_const::type>::type>::type; - // make sure the type matches the allowed types - static_assert( - std::is_same::value - or std::is_same::value - or std::is_same::value - or std::is_same::value - or std::is_same::value - or std::is_same::value - or std::is_same::value - , "incompatible pointer type"); - - // delegate the call to get_impl_ptr<>() const - return get_impl_ptr(static_cast(nullptr)); - } - - /*! - @brief get a reference value (implicit) - - Implicit reference access to the internally stored JSON value. No copies - are made. - - @warning Writing data to the referee of the result yields an undefined - state. - - @tparam ReferenceType reference type; must be a reference to @ref array_t, - @ref object_t, @ref string_t, @ref boolean_t, @ref number_integer_t, or - @ref number_float_t. Enforced by static assertion. - - @return reference to the internally stored JSON value if the requested - reference type @a ReferenceType fits to the JSON value; throws - type_error.303 otherwise - - @throw type_error.303 in case passed type @a ReferenceType is incompatible - with the stored JSON value; see example below - - @complexity Constant. - - @liveexample{The example shows several calls to `get_ref()`.,get_ref} - - @since version 1.1.0 - */ - template::value, int>::type = 0> - ReferenceType get_ref() - { - // delegate call to get_ref_impl - return get_ref_impl(*this); - } - - /*! - @brief get a reference value (implicit) - @copydoc get_ref() - */ - template::value and - std::is_const::type>::value, int>::type = 0> - ReferenceType get_ref() const - { - // delegate call to get_ref_impl - return get_ref_impl(*this); - } - - /*! - @brief get a value (implicit) - - Implicit type conversion between the JSON value and a compatible value. - The call is realized by calling @ref get() const. - - @tparam ValueType non-pointer type compatible to the JSON value, for - instance `int` for JSON integer numbers, `bool` for JSON booleans, or - `std::vector` types for JSON arrays. The character type of @ref string_t - as well as an initializer list of this type is excluded to avoid - ambiguities as these types implicitly convert to `std::string`. - - @return copy of the JSON value, converted to type @a ValueType - - @throw type_error.302 in case passed type @a ValueType is incompatible - to the JSON value type (e.g., the JSON value is of type boolean, but a - string is requested); see example below - - @complexity Linear in the size of the JSON value. - - @liveexample{The example below shows several conversions from JSON values - to other types. There a few things to note: (1) Floating-point numbers can - be converted to integers\, (2) A JSON array can be converted to a standard - `std::vector`\, (3) A JSON object can be converted to C++ - associative containers such as `std::unordered_map`.,operator__ValueType} - - @since version 1.0.0 - */ - template < typename ValueType, typename std::enable_if < - not std::is_pointer::value and - not std::is_same::value -#ifndef _MSC_VER // fix for issue #167 operator<< ambiguity under VS2015 - and not std::is_same>::value -#endif - , int >::type = 0 > - operator ValueType() const - { - // delegate the call to get<>() const - return get(); - } - - /// @} - - - //////////////////// - // element access // - //////////////////// - - /// @name element access - /// Access to the JSON value. - /// @{ - - /*! - @brief access specified array element with bounds checking - - Returns a reference to the element at specified location @a idx, with - bounds checking. - - @param[in] idx index of the element to access - - @return reference to the element at index @a idx - - @throw type_error.304 if the JSON value is not an array; in this case, - calling `at` with an index makes no sense. See example below. - @throw out_of_range.401 if the index @a idx is out of range of the array; - that is, `idx >= size()`. See example below. - - @exceptionsafety Strong guarantee: if an exception is thrown, there are no - changes in the JSON value. - - @complexity Constant. - - @since version 1.0.0 - - @liveexample{The example below shows how array elements can be read and - written using `at()`. It also demonstrates the different exceptions that - can be thrown.,at__size_type} - */ - reference at(size_type idx) - { - // at only works for arrays - if (is_array()) - { - JSON_TRY - { - return m_value.array->at(idx); - } - JSON_CATCH (std::out_of_range&) - { - // create better exception explanation - JSON_THROW(out_of_range(401, "array index " + std::to_string(idx) + " is out of range")); - } - } - else - { - JSON_THROW(type_error(304, "cannot use at() with " + type_name())); - } - } - - /*! - @brief access specified array element with bounds checking - - Returns a const reference to the element at specified location @a idx, - with bounds checking. - - @param[in] idx index of the element to access - - @return const reference to the element at index @a idx - - @throw type_error.304 if the JSON value is not an array; in this case, - calling `at` with an index makes no sense. See example below. - @throw out_of_range.401 if the index @a idx is out of range of the array; - that is, `idx >= size()`. See example below. - - @exceptionsafety Strong guarantee: if an exception is thrown, there are no - changes in the JSON value. - - @complexity Constant. - - @since version 1.0.0 - - @liveexample{The example below shows how array elements can be read using - `at()`. It also demonstrates the different exceptions that can be thrown., - at__size_type_const} - */ - const_reference at(size_type idx) const - { - // at only works for arrays - if (is_array()) - { - JSON_TRY - { - return m_value.array->at(idx); - } - JSON_CATCH (std::out_of_range&) - { - // create better exception explanation - JSON_THROW(out_of_range(401, "array index " + std::to_string(idx) + " is out of range")); - } - } - else - { - JSON_THROW(type_error(304, "cannot use at() with " + type_name())); - } - } - - /*! - @brief access specified object element with bounds checking - - Returns a reference to the element at with specified key @a key, with - bounds checking. - - @param[in] key key of the element to access - - @return reference to the element at key @a key - - @throw type_error.304 if the JSON value is not an object; in this case, - calling `at` with a key makes no sense. See example below. - @throw out_of_range.403 if the key @a key is is not stored in the object; - that is, `find(key) == end()`. See example below. - - @exceptionsafety Strong guarantee: if an exception is thrown, there are no - changes in the JSON value. - - @complexity Logarithmic in the size of the container. - - @sa @ref operator[](const typename object_t::key_type&) for unchecked - access by reference - @sa @ref value() for access by value with a default value - - @since version 1.0.0 - - @liveexample{The example below shows how object elements can be read and - written using `at()`. It also demonstrates the different exceptions that - can be thrown.,at__object_t_key_type} - */ - reference at(const typename object_t::key_type& key) - { - // at only works for objects - if (is_object()) - { - JSON_TRY - { - return m_value.object->at(key); - } - JSON_CATCH (std::out_of_range&) - { - // create better exception explanation - JSON_THROW(out_of_range(403, "key '" + key + "' not found")); - } - } - else - { - JSON_THROW(type_error(304, "cannot use at() with " + type_name())); - } - } - - /*! - @brief access specified object element with bounds checking - - Returns a const reference to the element at with specified key @a key, - with bounds checking. - - @param[in] key key of the element to access - - @return const reference to the element at key @a key - - @throw type_error.304 if the JSON value is not an object; in this case, - calling `at` with a key makes no sense. See example below. - @throw out_of_range.403 if the key @a key is is not stored in the object; - that is, `find(key) == end()`. See example below. - - @exceptionsafety Strong guarantee: if an exception is thrown, there are no - changes in the JSON value. - - @complexity Logarithmic in the size of the container. - - @sa @ref operator[](const typename object_t::key_type&) for unchecked - access by reference - @sa @ref value() for access by value with a default value - - @since version 1.0.0 - - @liveexample{The example below shows how object elements can be read using - `at()`. It also demonstrates the different exceptions that can be thrown., - at__object_t_key_type_const} - */ - const_reference at(const typename object_t::key_type& key) const - { - // at only works for objects - if (is_object()) - { - JSON_TRY - { - return m_value.object->at(key); - } - JSON_CATCH (std::out_of_range&) - { - // create better exception explanation - JSON_THROW(out_of_range(403, "key '" + key + "' not found")); - } - } - else - { - JSON_THROW(type_error(304, "cannot use at() with " + type_name())); - } - } - - /*! - @brief access specified array element - - Returns a reference to the element at specified location @a idx. - - @note If @a idx is beyond the range of the array (i.e., `idx >= size()`), - then the array is silently filled up with `null` values to make `idx` a - valid reference to the last stored element. - - @param[in] idx index of the element to access - - @return reference to the element at index @a idx - - @throw type_error.305 if the JSON value is not an array or null; in that - cases, using the [] operator with an index makes no sense. - - @complexity Constant if @a idx is in the range of the array. Otherwise - linear in `idx - size()`. - - @liveexample{The example below shows how array elements can be read and - written using `[]` operator. Note the addition of `null` - values.,operatorarray__size_type} - - @since version 1.0.0 - */ - reference operator[](size_type idx) - { - // implicitly convert null value to an empty array - if (is_null()) - { - m_type = value_t::array; - m_value.array = create(); - assert_invariant(); - } - - // operator[] only works for arrays - if (is_array()) - { - // fill up array with null values if given idx is outside range - if (idx >= m_value.array->size()) - { - m_value.array->insert(m_value.array->end(), - idx - m_value.array->size() + 1, - basic_json()); - } - - return m_value.array->operator[](idx); - } - - JSON_THROW(type_error(305, "cannot use operator[] with " + type_name())); - } - - /*! - @brief access specified array element - - Returns a const reference to the element at specified location @a idx. - - @param[in] idx index of the element to access - - @return const reference to the element at index @a idx - - @throw type_error.305 if the JSON value is not an array; in that cases, - using the [] operator with an index makes no sense. - - @complexity Constant. - - @liveexample{The example below shows how array elements can be read using - the `[]` operator.,operatorarray__size_type_const} - - @since version 1.0.0 - */ - const_reference operator[](size_type idx) const - { - // const operator[] only works for arrays - if (is_array()) - { - return m_value.array->operator[](idx); - } - - JSON_THROW(type_error(305, "cannot use operator[] with " + type_name())); - } - - /*! - @brief access specified object element - - Returns a reference to the element at with specified key @a key. - - @note If @a key is not found in the object, then it is silently added to - the object and filled with a `null` value to make `key` a valid reference. - In case the value was `null` before, it is converted to an object. - - @param[in] key key of the element to access - - @return reference to the element at key @a key - - @throw type_error.305 if the JSON value is not an object or null; in that - cases, using the [] operator with a key makes no sense. - - @complexity Logarithmic in the size of the container. - - @liveexample{The example below shows how object elements can be read and - written using the `[]` operator.,operatorarray__key_type} - - @sa @ref at(const typename object_t::key_type&) for access by reference - with range checking - @sa @ref value() for access by value with a default value - - @since version 1.0.0 - */ - reference operator[](const typename object_t::key_type& key) - { - // implicitly convert null value to an empty object - if (is_null()) - { - m_type = value_t::object; - m_value.object = create(); - assert_invariant(); - } - - // operator[] only works for objects - if (is_object()) - { - return m_value.object->operator[](key); - } - - JSON_THROW(type_error(305, "cannot use operator[] with " + type_name())); - } - - /*! - @brief read-only access specified object element - - Returns a const reference to the element at with specified key @a key. No - bounds checking is performed. - - @warning If the element with key @a key does not exist, the behavior is - undefined. - - @param[in] key key of the element to access - - @return const reference to the element at key @a key - - @pre The element with key @a key must exist. **This precondition is - enforced with an assertion.** - - @throw type_error.305 if the JSON value is not an object; in that cases, - using the [] operator with a key makes no sense. - - @complexity Logarithmic in the size of the container. - - @liveexample{The example below shows how object elements can be read using - the `[]` operator.,operatorarray__key_type_const} - - @sa @ref at(const typename object_t::key_type&) for access by reference - with range checking - @sa @ref value() for access by value with a default value - - @since version 1.0.0 - */ - const_reference operator[](const typename object_t::key_type& key) const - { - // const operator[] only works for objects - if (is_object()) - { - assert(m_value.object->find(key) != m_value.object->end()); - return m_value.object->find(key)->second; - } - - JSON_THROW(type_error(305, "cannot use operator[] with " + type_name())); - } - - /*! - @brief access specified object element - - Returns a reference to the element at with specified key @a key. - - @note If @a key is not found in the object, then it is silently added to - the object and filled with a `null` value to make `key` a valid reference. - In case the value was `null` before, it is converted to an object. - - @param[in] key key of the element to access - - @return reference to the element at key @a key - - @throw type_error.305 if the JSON value is not an object or null; in that - cases, using the [] operator with a key makes no sense. - - @complexity Logarithmic in the size of the container. - - @liveexample{The example below shows how object elements can be read and - written using the `[]` operator.,operatorarray__key_type} - - @sa @ref at(const typename object_t::key_type&) for access by reference - with range checking - @sa @ref value() for access by value with a default value - - @since version 1.0.0 - */ - template - reference operator[](T * (&key)[n]) - { - return operator[](static_cast(key)); - } - - /*! - @brief read-only access specified object element - - Returns a const reference to the element at with specified key @a key. No - bounds checking is performed. - - @warning If the element with key @a key does not exist, the behavior is - undefined. - - @note This function is required for compatibility reasons with Clang. - - @param[in] key key of the element to access - - @return const reference to the element at key @a key - - @throw type_error.305 if the JSON value is not an object; in that cases, - using the [] operator with a key makes no sense. - - @complexity Logarithmic in the size of the container. - - @liveexample{The example below shows how object elements can be read using - the `[]` operator.,operatorarray__key_type_const} - - @sa @ref at(const typename object_t::key_type&) for access by reference - with range checking - @sa @ref value() for access by value with a default value - - @since version 1.0.0 - */ - template - const_reference operator[](T * (&key)[n]) const - { - return operator[](static_cast(key)); - } - - /*! - @brief access specified object element - - Returns a reference to the element at with specified key @a key. - - @note If @a key is not found in the object, then it is silently added to - the object and filled with a `null` value to make `key` a valid reference. - In case the value was `null` before, it is converted to an object. - - @param[in] key key of the element to access - - @return reference to the element at key @a key - - @throw type_error.305 if the JSON value is not an object or null; in that - cases, using the [] operator with a key makes no sense. - - @complexity Logarithmic in the size of the container. - - @liveexample{The example below shows how object elements can be read and - written using the `[]` operator.,operatorarray__key_type} - - @sa @ref at(const typename object_t::key_type&) for access by reference - with range checking - @sa @ref value() for access by value with a default value - - @since version 1.1.0 - */ - template - reference operator[](T* key) - { - // implicitly convert null to object - if (is_null()) - { - m_type = value_t::object; - m_value = value_t::object; - assert_invariant(); - } - - // at only works for objects - if (is_object()) - { - return m_value.object->operator[](key); - } - - JSON_THROW(type_error(305, "cannot use operator[] with " + type_name())); - } - - /*! - @brief read-only access specified object element - - Returns a const reference to the element at with specified key @a key. No - bounds checking is performed. - - @warning If the element with key @a key does not exist, the behavior is - undefined. - - @param[in] key key of the element to access - - @return const reference to the element at key @a key - - @pre The element with key @a key must exist. **This precondition is - enforced with an assertion.** - - @throw type_error.305 if the JSON value is not an object; in that cases, - using the [] operator with a key makes no sense. - - @complexity Logarithmic in the size of the container. - - @liveexample{The example below shows how object elements can be read using - the `[]` operator.,operatorarray__key_type_const} - - @sa @ref at(const typename object_t::key_type&) for access by reference - with range checking - @sa @ref value() for access by value with a default value - - @since version 1.1.0 - */ - template - const_reference operator[](T* key) const - { - // at only works for objects - if (is_object()) - { - assert(m_value.object->find(key) != m_value.object->end()); - return m_value.object->find(key)->second; - } - - JSON_THROW(type_error(305, "cannot use operator[] with " + type_name())); - } - - /*! - @brief access specified object element with default value - - Returns either a copy of an object's element at the specified key @a key - or a given default value if no element with key @a key exists. - - The function is basically equivalent to executing - @code {.cpp} - try { - return at(key); - } catch(out_of_range) { - return default_value; - } - @endcode - - @note Unlike @ref at(const typename object_t::key_type&), this function - does not throw if the given key @a key was not found. - - @note Unlike @ref operator[](const typename object_t::key_type& key), this - function does not implicitly add an element to the position defined by @a - key. This function is furthermore also applicable to const objects. - - @param[in] key key of the element to access - @param[in] default_value the value to return if @a key is not found - - @tparam ValueType type compatible to JSON values, for instance `int` for - JSON integer numbers, `bool` for JSON booleans, or `std::vector` types for - JSON arrays. Note the type of the expected value at @a key and the default - value @a default_value must be compatible. - - @return copy of the element at key @a key or @a default_value if @a key - is not found - - @throw type_error.306 if the JSON value is not an objec; in that cases, - using `value()` with a key makes no sense. - - @complexity Logarithmic in the size of the container. - - @liveexample{The example below shows how object elements can be queried - with a default value.,basic_json__value} - - @sa @ref at(const typename object_t::key_type&) for access by reference - with range checking - @sa @ref operator[](const typename object_t::key_type&) for unchecked - access by reference - - @since version 1.0.0 - */ - template::value, int>::type = 0> - ValueType value(const typename object_t::key_type& key, ValueType default_value) const - { - // at only works for objects - if (is_object()) - { - // if key is found, return value and given default value otherwise - const auto it = find(key); - if (it != end()) - { - return *it; - } - - return default_value; - } - else - { - JSON_THROW(type_error(306, "cannot use value() with " + type_name())); - } - } - - /*! - @brief overload for a default value of type const char* - @copydoc basic_json::value(const typename object_t::key_type&, ValueType) const - */ - string_t value(const typename object_t::key_type& key, const char* default_value) const - { - return value(key, string_t(default_value)); - } - - /*! - @brief access specified object element via JSON Pointer with default value - - Returns either a copy of an object's element at the specified key @a key - or a given default value if no element with key @a key exists. - - The function is basically equivalent to executing - @code {.cpp} - try { - return at(ptr); - } catch(out_of_range) { - return default_value; - } - @endcode - - @note Unlike @ref at(const json_pointer&), this function does not throw - if the given key @a key was not found. - - @param[in] ptr a JSON pointer to the element to access - @param[in] default_value the value to return if @a ptr found no value - - @tparam ValueType type compatible to JSON values, for instance `int` for - JSON integer numbers, `bool` for JSON booleans, or `std::vector` types for - JSON arrays. Note the type of the expected value at @a key and the default - value @a default_value must be compatible. - - @return copy of the element at key @a key or @a default_value if @a key - is not found - - @throw type_error.306 if the JSON value is not an objec; in that cases, - using `value()` with a key makes no sense. - - @complexity Logarithmic in the size of the container. - - @liveexample{The example below shows how object elements can be queried - with a default value.,basic_json__value_ptr} - - @sa @ref operator[](const json_pointer&) for unchecked access by reference - - @since version 2.0.2 - */ - template::value, int>::type = 0> - ValueType value(const json_pointer& ptr, ValueType default_value) const - { - // at only works for objects - if (is_object()) - { - // if pointer resolves a value, return it or use default value - JSON_TRY - { - return ptr.get_checked(this); - } - JSON_CATCH (out_of_range&) - { - return default_value; - } - } - - JSON_THROW(type_error(306, "cannot use value() with " + type_name())); - } - - /*! - @brief overload for a default value of type const char* - @copydoc basic_json::value(const json_pointer&, ValueType) const - */ - string_t value(const json_pointer& ptr, const char* default_value) const - { - return value(ptr, string_t(default_value)); - } - - /*! - @brief access the first element - - Returns a reference to the first element in the container. For a JSON - container `c`, the expression `c.front()` is equivalent to `*c.begin()`. - - @return In case of a structured type (array or object), a reference to the - first element is returned. In case of number, string, or boolean values, a - reference to the value is returned. - - @complexity Constant. - - @pre The JSON value must not be `null` (would throw `std::out_of_range`) - or an empty array or object (undefined behavior, **guarded by - assertions**). - @post The JSON value remains unchanged. - - @throw invalid_iterator.214 when called on `null` value - - @liveexample{The following code shows an example for `front()`.,front} - - @sa @ref back() -- access the last element - - @since version 1.0.0 - */ - reference front() - { - return *begin(); - } - - /*! - @copydoc basic_json::front() - */ - const_reference front() const - { - return *cbegin(); - } - - /*! - @brief access the last element - - Returns a reference to the last element in the container. For a JSON - container `c`, the expression `c.back()` is equivalent to - @code {.cpp} - auto tmp = c.end(); - --tmp; - return *tmp; - @endcode - - @return In case of a structured type (array or object), a reference to the - last element is returned. In case of number, string, or boolean values, a - reference to the value is returned. - - @complexity Constant. - - @pre The JSON value must not be `null` (would throw `std::out_of_range`) - or an empty array or object (undefined behavior, **guarded by - assertions**). - @post The JSON value remains unchanged. - - @throw invalid_iterator.214 when called on a `null` value. See example - below. - - @liveexample{The following code shows an example for `back()`.,back} - - @sa @ref front() -- access the first element - - @since version 1.0.0 - */ - reference back() - { - auto tmp = end(); - --tmp; - return *tmp; - } - - /*! - @copydoc basic_json::back() - */ - const_reference back() const - { - auto tmp = cend(); - --tmp; - return *tmp; - } - - /*! - @brief remove element given an iterator - - Removes the element specified by iterator @a pos. The iterator @a pos must - be valid and dereferenceable. Thus the `end()` iterator (which is valid, - but is not dereferenceable) cannot be used as a value for @a pos. - - If called on a primitive type other than `null`, the resulting JSON value - will be `null`. - - @param[in] pos iterator to the element to remove - @return Iterator following the last removed element. If the iterator @a - pos refers to the last element, the `end()` iterator is returned. - - @tparam IteratorType an @ref iterator or @ref const_iterator - - @post Invalidates iterators and references at or after the point of the - erase, including the `end()` iterator. - - @throw type_error.307 if called on a `null` value; example: `"cannot use - erase() with null"` - @throw invalid_iterator.202 if called on an iterator which does not belong - to the current JSON value; example: `"iterator does not fit current - value"` - @throw invalid_iterator.205 if called on a primitive type with invalid - iterator (i.e., any iterator which is not `begin()`); example: `"iterator - out of range"` - - @complexity The complexity depends on the type: - - objects: amortized constant - - arrays: linear in distance between @a pos and the end of the container - - strings: linear in the length of the string - - other types: constant - - @liveexample{The example shows the result of `erase()` for different JSON - types.,erase__IteratorType} - - @sa @ref erase(IteratorType, IteratorType) -- removes the elements in - the given range - @sa @ref erase(const typename object_t::key_type&) -- removes the element - from an object at the given key - @sa @ref erase(const size_type) -- removes the element from an array at - the given index - - @since version 1.0.0 - */ - template::value or - std::is_same::value, int>::type - = 0> - IteratorType erase(IteratorType pos) - { - // make sure iterator fits the current value - if (this != pos.m_object) - { - JSON_THROW(invalid_iterator(202, "iterator does not fit current value")); - } - - IteratorType result = end(); - - switch (m_type) - { - case value_t::boolean: - case value_t::number_float: - case value_t::number_integer: - case value_t::number_unsigned: - case value_t::string: - { - if (not pos.m_it.primitive_iterator.is_begin()) - { - JSON_THROW(invalid_iterator(205, "iterator out of range")); - } - - if (is_string()) - { - AllocatorType alloc; - alloc.destroy(m_value.string); - alloc.deallocate(m_value.string, 1); - m_value.string = nullptr; - } - - m_type = value_t::null; - assert_invariant(); - break; - } - - case value_t::object: - { - result.m_it.object_iterator = m_value.object->erase(pos.m_it.object_iterator); - break; - } - - case value_t::array: - { - result.m_it.array_iterator = m_value.array->erase(pos.m_it.array_iterator); - break; - } - - default: - { - JSON_THROW(type_error(307, "cannot use erase() with " + type_name())); - } - } - - return result; - } - - /*! - @brief remove elements given an iterator range - - Removes the element specified by the range `[first; last)`. The iterator - @a first does not need to be dereferenceable if `first == last`: erasing - an empty range is a no-op. - - If called on a primitive type other than `null`, the resulting JSON value - will be `null`. - - @param[in] first iterator to the beginning of the range to remove - @param[in] last iterator past the end of the range to remove - @return Iterator following the last removed element. If the iterator @a - second refers to the last element, the `end()` iterator is returned. - - @tparam IteratorType an @ref iterator or @ref const_iterator - - @post Invalidates iterators and references at or after the point of the - erase, including the `end()` iterator. - - @throw type_error.307 if called on a `null` value; example: `"cannot use - erase() with null"` - @throw invalid_iterator.203 if called on iterators which does not belong - to the current JSON value; example: `"iterators do not fit current value"` - @throw invalid_iterator.204 if called on a primitive type with invalid - iterators (i.e., if `first != begin()` and `last != end()`); example: - `"iterators out of range"` - - @complexity The complexity depends on the type: - - objects: `log(size()) + std::distance(first, last)` - - arrays: linear in the distance between @a first and @a last, plus linear - in the distance between @a last and end of the container - - strings: linear in the length of the string - - other types: constant - - @liveexample{The example shows the result of `erase()` for different JSON - types.,erase__IteratorType_IteratorType} - - @sa @ref erase(IteratorType) -- removes the element at a given position - @sa @ref erase(const typename object_t::key_type&) -- removes the element - from an object at the given key - @sa @ref erase(const size_type) -- removes the element from an array at - the given index - - @since version 1.0.0 - */ - template::value or - std::is_same::value, int>::type - = 0> - IteratorType erase(IteratorType first, IteratorType last) - { - // make sure iterator fits the current value - if (this != first.m_object or this != last.m_object) - { - JSON_THROW(invalid_iterator(203, "iterators do not fit current value")); - } - - IteratorType result = end(); - - switch (m_type) - { - case value_t::boolean: - case value_t::number_float: - case value_t::number_integer: - case value_t::number_unsigned: - case value_t::string: - { - if (not first.m_it.primitive_iterator.is_begin() or not last.m_it.primitive_iterator.is_end()) - { - JSON_THROW(invalid_iterator(204, "iterators out of range")); - } - - if (is_string()) - { - AllocatorType alloc; - alloc.destroy(m_value.string); - alloc.deallocate(m_value.string, 1); - m_value.string = nullptr; - } - - m_type = value_t::null; - assert_invariant(); - break; - } - - case value_t::object: - { - result.m_it.object_iterator = m_value.object->erase(first.m_it.object_iterator, - last.m_it.object_iterator); - break; - } - - case value_t::array: - { - result.m_it.array_iterator = m_value.array->erase(first.m_it.array_iterator, - last.m_it.array_iterator); - break; - } - - default: - { - JSON_THROW(type_error(307, "cannot use erase() with " + type_name())); - } - } - - return result; - } - - /*! - @brief remove element from a JSON object given a key - - Removes elements from a JSON object with the key value @a key. - - @param[in] key value of the elements to remove - - @return Number of elements removed. If @a ObjectType is the default - `std::map` type, the return value will always be `0` (@a key was not - found) or `1` (@a key was found). - - @post References and iterators to the erased elements are invalidated. - Other references and iterators are not affected. - - @throw type_error.307 when called on a type other than JSON object; - example: `"cannot use erase() with null"` - - @complexity `log(size()) + count(key)` - - @liveexample{The example shows the effect of `erase()`.,erase__key_type} - - @sa @ref erase(IteratorType) -- removes the element at a given position - @sa @ref erase(IteratorType, IteratorType) -- removes the elements in - the given range - @sa @ref erase(const size_type) -- removes the element from an array at - the given index - - @since version 1.0.0 - */ - size_type erase(const typename object_t::key_type& key) - { - // this erase only works for objects - if (is_object()) - { - return m_value.object->erase(key); - } - - JSON_THROW(type_error(307, "cannot use erase() with " + type_name())); - } - - /*! - @brief remove element from a JSON array given an index - - Removes element from a JSON array at the index @a idx. - - @param[in] idx index of the element to remove - - @throw type_error.307 when called on a type other than JSON object; - example: `"cannot use erase() with null"` - @throw out_of_range.401 when `idx >= size()`; example: `"array index 17 - is out of range"` - - @complexity Linear in distance between @a idx and the end of the container. - - @liveexample{The example shows the effect of `erase()`.,erase__size_type} - - @sa @ref erase(IteratorType) -- removes the element at a given position - @sa @ref erase(IteratorType, IteratorType) -- removes the elements in - the given range - @sa @ref erase(const typename object_t::key_type&) -- removes the element - from an object at the given key - - @since version 1.0.0 - */ - void erase(const size_type idx) - { - // this erase only works for arrays - if (is_array()) - { - if (idx >= size()) - { - JSON_THROW(out_of_range(401, "array index " + std::to_string(idx) + " is out of range")); - } - - m_value.array->erase(m_value.array->begin() + static_cast(idx)); - } - else - { - JSON_THROW(type_error(307, "cannot use erase() with " + type_name())); - } - } - - /// @} - - - //////////// - // lookup // - //////////// - - /// @name lookup - /// @{ - - /*! - @brief find an element in a JSON object - - Finds an element in a JSON object with key equivalent to @a key. If the - element is not found or the JSON value is not an object, end() is - returned. - - @note This method always returns @ref end() when executed on a JSON type - that is not an object. - - @param[in] key key value of the element to search for - - @return Iterator to an element with key equivalent to @a key. If no such - element is found or the JSON value is not an object, past-the-end (see - @ref end()) iterator is returned. - - @complexity Logarithmic in the size of the JSON object. - - @liveexample{The example shows how `find()` is used.,find__key_type} - - @since version 1.0.0 - */ - iterator find(typename object_t::key_type key) - { - auto result = end(); - - if (is_object()) - { - result.m_it.object_iterator = m_value.object->find(key); - } - - return result; - } - - /*! - @brief find an element in a JSON object - @copydoc find(typename object_t::key_type) - */ - const_iterator find(typename object_t::key_type key) const - { - auto result = cend(); - - if (is_object()) - { - result.m_it.object_iterator = m_value.object->find(key); - } - - return result; - } - - /*! - @brief returns the number of occurrences of a key in a JSON object - - Returns the number of elements with key @a key. If ObjectType is the - default `std::map` type, the return value will always be `0` (@a key was - not found) or `1` (@a key was found). - - @note This method always returns `0` when executed on a JSON type that is - not an object. - - @param[in] key key value of the element to count - - @return Number of elements with key @a key. If the JSON value is not an - object, the return value will be `0`. - - @complexity Logarithmic in the size of the JSON object. - - @liveexample{The example shows how `count()` is used.,count} - - @since version 1.0.0 - */ - size_type count(typename object_t::key_type key) const - { - // return 0 for all nonobject types - return is_object() ? m_value.object->count(key) : 0; - } - - /// @} - - - /////////////// - // iterators // - /////////////// - - /// @name iterators - /// @{ - - /*! - @brief returns an iterator to the first element - - Returns an iterator to the first element. - - @image html range-begin-end.svg "Illustration from cppreference.com" - - @return iterator to the first element - - @complexity Constant. - - @requirement This function helps `basic_json` satisfying the - [Container](http://en.cppreference.com/w/cpp/concept/Container) - requirements: - - The complexity is constant. - - @liveexample{The following code shows an example for `begin()`.,begin} - - @sa @ref cbegin() -- returns a const iterator to the beginning - @sa @ref end() -- returns an iterator to the end - @sa @ref cend() -- returns a const iterator to the end - - @since version 1.0.0 - */ - iterator begin() noexcept - { - iterator result(this); - result.set_begin(); - return result; - } - - /*! - @copydoc basic_json::cbegin() - */ - const_iterator begin() const noexcept - { - return cbegin(); - } - - /*! - @brief returns a const iterator to the first element - - Returns a const iterator to the first element. - - @image html range-begin-end.svg "Illustration from cppreference.com" - - @return const iterator to the first element - - @complexity Constant. - - @requirement This function helps `basic_json` satisfying the - [Container](http://en.cppreference.com/w/cpp/concept/Container) - requirements: - - The complexity is constant. - - Has the semantics of `const_cast(*this).begin()`. - - @liveexample{The following code shows an example for `cbegin()`.,cbegin} - - @sa @ref begin() -- returns an iterator to the beginning - @sa @ref end() -- returns an iterator to the end - @sa @ref cend() -- returns a const iterator to the end - - @since version 1.0.0 - */ - const_iterator cbegin() const noexcept - { - const_iterator result(this); - result.set_begin(); - return result; - } - - /*! - @brief returns an iterator to one past the last element - - Returns an iterator to one past the last element. - - @image html range-begin-end.svg "Illustration from cppreference.com" - - @return iterator one past the last element - - @complexity Constant. - - @requirement This function helps `basic_json` satisfying the - [Container](http://en.cppreference.com/w/cpp/concept/Container) - requirements: - - The complexity is constant. - - @liveexample{The following code shows an example for `end()`.,end} - - @sa @ref cend() -- returns a const iterator to the end - @sa @ref begin() -- returns an iterator to the beginning - @sa @ref cbegin() -- returns a const iterator to the beginning - - @since version 1.0.0 - */ - iterator end() noexcept - { - iterator result(this); - result.set_end(); - return result; - } - - /*! - @copydoc basic_json::cend() - */ - const_iterator end() const noexcept - { - return cend(); - } - - /*! - @brief returns a const iterator to one past the last element - - Returns a const iterator to one past the last element. - - @image html range-begin-end.svg "Illustration from cppreference.com" - - @return const iterator one past the last element - - @complexity Constant. - - @requirement This function helps `basic_json` satisfying the - [Container](http://en.cppreference.com/w/cpp/concept/Container) - requirements: - - The complexity is constant. - - Has the semantics of `const_cast(*this).end()`. - - @liveexample{The following code shows an example for `cend()`.,cend} - - @sa @ref end() -- returns an iterator to the end - @sa @ref begin() -- returns an iterator to the beginning - @sa @ref cbegin() -- returns a const iterator to the beginning - - @since version 1.0.0 - */ - const_iterator cend() const noexcept - { - const_iterator result(this); - result.set_end(); - return result; - } - - /*! - @brief returns an iterator to the reverse-beginning - - Returns an iterator to the reverse-beginning; that is, the last element. - - @image html range-rbegin-rend.svg "Illustration from cppreference.com" - - @complexity Constant. - - @requirement This function helps `basic_json` satisfying the - [ReversibleContainer](http://en.cppreference.com/w/cpp/concept/ReversibleContainer) - requirements: - - The complexity is constant. - - Has the semantics of `reverse_iterator(end())`. - - @liveexample{The following code shows an example for `rbegin()`.,rbegin} - - @sa @ref crbegin() -- returns a const reverse iterator to the beginning - @sa @ref rend() -- returns a reverse iterator to the end - @sa @ref crend() -- returns a const reverse iterator to the end - - @since version 1.0.0 - */ - reverse_iterator rbegin() noexcept - { - return reverse_iterator(end()); - } - - /*! - @copydoc basic_json::crbegin() - */ - const_reverse_iterator rbegin() const noexcept - { - return crbegin(); - } - - /*! - @brief returns an iterator to the reverse-end - - Returns an iterator to the reverse-end; that is, one before the first - element. - - @image html range-rbegin-rend.svg "Illustration from cppreference.com" - - @complexity Constant. - - @requirement This function helps `basic_json` satisfying the - [ReversibleContainer](http://en.cppreference.com/w/cpp/concept/ReversibleContainer) - requirements: - - The complexity is constant. - - Has the semantics of `reverse_iterator(begin())`. - - @liveexample{The following code shows an example for `rend()`.,rend} - - @sa @ref crend() -- returns a const reverse iterator to the end - @sa @ref rbegin() -- returns a reverse iterator to the beginning - @sa @ref crbegin() -- returns a const reverse iterator to the beginning - - @since version 1.0.0 - */ - reverse_iterator rend() noexcept - { - return reverse_iterator(begin()); - } - - /*! - @copydoc basic_json::crend() - */ - const_reverse_iterator rend() const noexcept - { - return crend(); - } - - /*! - @brief returns a const reverse iterator to the last element - - Returns a const iterator to the reverse-beginning; that is, the last - element. - - @image html range-rbegin-rend.svg "Illustration from cppreference.com" - - @complexity Constant. - - @requirement This function helps `basic_json` satisfying the - [ReversibleContainer](http://en.cppreference.com/w/cpp/concept/ReversibleContainer) - requirements: - - The complexity is constant. - - Has the semantics of `const_cast(*this).rbegin()`. - - @liveexample{The following code shows an example for `crbegin()`.,crbegin} - - @sa @ref rbegin() -- returns a reverse iterator to the beginning - @sa @ref rend() -- returns a reverse iterator to the end - @sa @ref crend() -- returns a const reverse iterator to the end - - @since version 1.0.0 - */ - const_reverse_iterator crbegin() const noexcept - { - return const_reverse_iterator(cend()); - } - - /*! - @brief returns a const reverse iterator to one before the first - - Returns a const reverse iterator to the reverse-end; that is, one before - the first element. - - @image html range-rbegin-rend.svg "Illustration from cppreference.com" - - @complexity Constant. - - @requirement This function helps `basic_json` satisfying the - [ReversibleContainer](http://en.cppreference.com/w/cpp/concept/ReversibleContainer) - requirements: - - The complexity is constant. - - Has the semantics of `const_cast(*this).rend()`. - - @liveexample{The following code shows an example for `crend()`.,crend} - - @sa @ref rend() -- returns a reverse iterator to the end - @sa @ref rbegin() -- returns a reverse iterator to the beginning - @sa @ref crbegin() -- returns a const reverse iterator to the beginning - - @since version 1.0.0 - */ - const_reverse_iterator crend() const noexcept - { - return const_reverse_iterator(cbegin()); - } - - private: - // forward declaration - template class iteration_proxy; - - public: - /*! - @brief wrapper to access iterator member functions in range-based for - - This function allows to access @ref iterator::key() and @ref - iterator::value() during range-based for loops. In these loops, a - reference to the JSON values is returned, so there is no access to the - underlying iterator. - - @note The name of this function is not yet final and may change in the - future. - */ - static iteration_proxy iterator_wrapper(reference cont) - { - return iteration_proxy(cont); - } - - /*! - @copydoc iterator_wrapper(reference) - */ - static iteration_proxy iterator_wrapper(const_reference cont) - { - return iteration_proxy(cont); - } - - /// @} - - - ////////////// - // capacity // - ////////////// - - /// @name capacity - /// @{ - - /*! - @brief checks whether the container is empty - - Checks if a JSON value has no elements. - - @return The return value depends on the different types and is - defined as follows: - Value type | return value - ----------- | ------------- - null | `true` - boolean | `false` - string | `false` - number | `false` - object | result of function `object_t::empty()` - array | result of function `array_t::empty()` - - @note This function does not return whether a string stored as JSON value - is empty - it returns whether the JSON container itself is empty which is - false in the case of a string. - - @complexity Constant, as long as @ref array_t and @ref object_t satisfy - the Container concept; that is, their `empty()` functions have constant - complexity. - - @requirement This function helps `basic_json` satisfying the - [Container](http://en.cppreference.com/w/cpp/concept/Container) - requirements: - - The complexity is constant. - - Has the semantics of `begin() == end()`. - - @liveexample{The following code uses `empty()` to check if a JSON - object contains any elements.,empty} - - @sa @ref size() -- returns the number of elements - - @since version 1.0.0 - */ - bool empty() const noexcept - { - switch (m_type) - { - case value_t::null: - { - // null values are empty - return true; - } - - case value_t::array: - { - // delegate call to array_t::empty() - return m_value.array->empty(); - } - - case value_t::object: - { - // delegate call to object_t::empty() - return m_value.object->empty(); - } - - default: - { - // all other types are nonempty - return false; - } - } - } - - /*! - @brief returns the number of elements - - Returns the number of elements in a JSON value. - - @return The return value depends on the different types and is - defined as follows: - Value type | return value - ----------- | ------------- - null | `0` - boolean | `1` - string | `1` - number | `1` - object | result of function object_t::size() - array | result of function array_t::size() - - @note This function does not return the length of a string stored as JSON - value - it returns the number of elements in the JSON value which is 1 in - the case of a string. - - @complexity Constant, as long as @ref array_t and @ref object_t satisfy - the Container concept; that is, their size() functions have constant - complexity. - - @requirement This function helps `basic_json` satisfying the - [Container](http://en.cppreference.com/w/cpp/concept/Container) - requirements: - - The complexity is constant. - - Has the semantics of `std::distance(begin(), end())`. - - @liveexample{The following code calls `size()` on the different value - types.,size} - - @sa @ref empty() -- checks whether the container is empty - @sa @ref max_size() -- returns the maximal number of elements - - @since version 1.0.0 - */ - size_type size() const noexcept - { - switch (m_type) - { - case value_t::null: - { - // null values are empty - return 0; - } - - case value_t::array: - { - // delegate call to array_t::size() - return m_value.array->size(); - } - - case value_t::object: - { - // delegate call to object_t::size() - return m_value.object->size(); - } - - default: - { - // all other types have size 1 - return 1; - } - } - } - - /*! - @brief returns the maximum possible number of elements - - Returns the maximum number of elements a JSON value is able to hold due to - system or library implementation limitations, i.e. `std::distance(begin(), - end())` for the JSON value. - - @return The return value depends on the different types and is - defined as follows: - Value type | return value - ----------- | ------------- - null | `0` (same as `size()`) - boolean | `1` (same as `size()`) - string | `1` (same as `size()`) - number | `1` (same as `size()`) - object | result of function `object_t::max_size()` - array | result of function `array_t::max_size()` - - @complexity Constant, as long as @ref array_t and @ref object_t satisfy - the Container concept; that is, their `max_size()` functions have constant - complexity. - - @requirement This function helps `basic_json` satisfying the - [Container](http://en.cppreference.com/w/cpp/concept/Container) - requirements: - - The complexity is constant. - - Has the semantics of returning `b.size()` where `b` is the largest - possible JSON value. - - @liveexample{The following code calls `max_size()` on the different value - types. Note the output is implementation specific.,max_size} - - @sa @ref size() -- returns the number of elements - - @since version 1.0.0 - */ - size_type max_size() const noexcept - { - switch (m_type) - { - case value_t::array: - { - // delegate call to array_t::max_size() - return m_value.array->max_size(); - } - - case value_t::object: - { - // delegate call to object_t::max_size() - return m_value.object->max_size(); - } - - default: - { - // all other types have max_size() == size() - return size(); - } - } - } - - /// @} - - - /////////////// - // modifiers // - /////////////// - - /// @name modifiers - /// @{ - - /*! - @brief clears the contents - - Clears the content of a JSON value and resets it to the default value as - if @ref basic_json(value_t) would have been called: - - Value type | initial value - ----------- | ------------- - null | `null` - boolean | `false` - string | `""` - number | `0` - object | `{}` - array | `[]` - - @complexity Linear in the size of the JSON value. - - @liveexample{The example below shows the effect of `clear()` to different - JSON types.,clear} - - @since version 1.0.0 - */ - void clear() noexcept - { - switch (m_type) - { - case value_t::number_integer: - { - m_value.number_integer = 0; - break; - } - - case value_t::number_unsigned: - { - m_value.number_unsigned = 0; - break; - } - - case value_t::number_float: - { - m_value.number_float = 0.0; - break; - } - - case value_t::boolean: - { - m_value.boolean = false; - break; - } - - case value_t::string: - { - m_value.string->clear(); - break; - } - - case value_t::array: - { - m_value.array->clear(); - break; - } - - case value_t::object: - { - m_value.object->clear(); - break; - } - - default: - { - break; - } - } - } - - /*! - @brief add an object to an array - - Appends the given element @a val to the end of the JSON value. If the - function is called on a JSON null value, an empty array is created before - appending @a val. - - @param[in] val the value to add to the JSON array - - @throw type_error.308 when called on a type other than JSON array or - null; example: `"cannot use push_back() with number"` - - @complexity Amortized constant. - - @liveexample{The example shows how `push_back()` and `+=` can be used to - add elements to a JSON array. Note how the `null` value was silently - converted to a JSON array.,push_back} - - @since version 1.0.0 - */ - void push_back(basic_json&& val) - { - // push_back only works for null objects or arrays - if (not(is_null() or is_array())) - { - JSON_THROW(type_error(308, "cannot use push_back() with " + type_name())); - } - - // transform null object into an array - if (is_null()) - { - m_type = value_t::array; - m_value = value_t::array; - assert_invariant(); - } - - // add element to array (move semantics) - m_value.array->push_back(std::move(val)); - // invalidate object - val.m_type = value_t::null; - } - - /*! - @brief add an object to an array - @copydoc push_back(basic_json&&) - */ - reference operator+=(basic_json&& val) - { - push_back(std::move(val)); - return *this; - } - - /*! - @brief add an object to an array - @copydoc push_back(basic_json&&) - */ - void push_back(const basic_json& val) - { - // push_back only works for null objects or arrays - if (not(is_null() or is_array())) - { - JSON_THROW(type_error(308, "cannot use push_back() with " + type_name())); - } - - // transform null object into an array - if (is_null()) - { - m_type = value_t::array; - m_value = value_t::array; - assert_invariant(); - } - - // add element to array - m_value.array->push_back(val); - } - - /*! - @brief add an object to an array - @copydoc push_back(basic_json&&) - */ - reference operator+=(const basic_json& val) - { - push_back(val); - return *this; - } - - /*! - @brief add an object to an object - - Inserts the given element @a val to the JSON object. If the function is - called on a JSON null value, an empty object is created before inserting - @a val. - - @param[in] val the value to add to the JSON object - - @throw type_error.308 when called on a type other than JSON object or - null; example: `"cannot use push_back() with number"` - - @complexity Logarithmic in the size of the container, O(log(`size()`)). - - @liveexample{The example shows how `push_back()` and `+=` can be used to - add elements to a JSON object. Note how the `null` value was silently - converted to a JSON object.,push_back__object_t__value} - - @since version 1.0.0 - */ - void push_back(const typename object_t::value_type& val) - { - // push_back only works for null objects or objects - if (not(is_null() or is_object())) - { - JSON_THROW(type_error(308, "cannot use push_back() with " + type_name())); - } - - // transform null object into an object - if (is_null()) - { - m_type = value_t::object; - m_value = value_t::object; - assert_invariant(); - } - - // add element to array - m_value.object->insert(val); - } - - /*! - @brief add an object to an object - @copydoc push_back(const typename object_t::value_type&) - */ - reference operator+=(const typename object_t::value_type& val) - { - push_back(val); - return *this; - } - - /*! - @brief add an object to an object - - This function allows to use `push_back` with an initializer list. In case - - 1. the current value is an object, - 2. the initializer list @a init contains only two elements, and - 3. the first element of @a init is a string, - - @a init is converted into an object element and added using - @ref push_back(const typename object_t::value_type&). Otherwise, @a init - is converted to a JSON value and added using @ref push_back(basic_json&&). - - @param init an initializer list - - @complexity Linear in the size of the initializer list @a init. - - @note This function is required to resolve an ambiguous overload error, - because pairs like `{"key", "value"}` can be both interpreted as - `object_t::value_type` or `std::initializer_list`, see - https://github.com/nlohmann/json/issues/235 for more information. - - @liveexample{The example shows how initializer lists are treated as - objects when possible.,push_back__initializer_list} - */ - void push_back(std::initializer_list init) - { - if (is_object() and init.size() == 2 and init.begin()->is_string()) - { - const string_t key = *init.begin(); - push_back(typename object_t::value_type(key, *(init.begin() + 1))); - } - else - { - push_back(basic_json(init)); - } - } - - /*! - @brief add an object to an object - @copydoc push_back(std::initializer_list) - */ - reference operator+=(std::initializer_list init) - { - push_back(init); - return *this; - } - - /*! - @brief add an object to an array - - Creates a JSON value from the passed parameters @a args to the end of the - JSON value. If the function is called on a JSON null value, an empty array - is created before appending the value created from @a args. - - @param[in] args arguments to forward to a constructor of @ref basic_json - @tparam Args compatible types to create a @ref basic_json object - - @throw type_error.311 when called on a type other than JSON array or - null; example: `"cannot use emplace_back() with number"` - - @complexity Amortized constant. - - @liveexample{The example shows how `push_back()` can be used to add - elements to a JSON array. Note how the `null` value was silently converted - to a JSON array.,emplace_back} - - @since version 2.0.8 - */ - template - void emplace_back(Args&& ... args) - { - // emplace_back only works for null objects or arrays - if (not(is_null() or is_array())) - { - JSON_THROW(type_error(311, "cannot use emplace_back() with " + type_name())); - } - - // transform null object into an array - if (is_null()) - { - m_type = value_t::array; - m_value = value_t::array; - assert_invariant(); - } - - // add element to array (perfect forwarding) - m_value.array->emplace_back(std::forward(args)...); - } - - /*! - @brief add an object to an object if key does not exist - - Inserts a new element into a JSON object constructed in-place with the - given @a args if there is no element with the key in the container. If the - function is called on a JSON null value, an empty object is created before - appending the value created from @a args. - - @param[in] args arguments to forward to a constructor of @ref basic_json - @tparam Args compatible types to create a @ref basic_json object - - @return a pair consisting of an iterator to the inserted element, or the - already-existing element if no insertion happened, and a bool - denoting whether the insertion took place. - - @throw type_error.311 when called on a type other than JSON object or - null; example: `"cannot use emplace() with number"` - - @complexity Logarithmic in the size of the container, O(log(`size()`)). - - @liveexample{The example shows how `emplace()` can be used to add elements - to a JSON object. Note how the `null` value was silently converted to a - JSON object. Further note how no value is added if there was already one - value stored with the same key.,emplace} - - @since version 2.0.8 - */ - template - std::pair emplace(Args&& ... args) - { - // emplace only works for null objects or arrays - if (not(is_null() or is_object())) - { - JSON_THROW(type_error(311, "cannot use emplace() with " + type_name())); - } - - // transform null object into an object - if (is_null()) - { - m_type = value_t::object; - m_value = value_t::object; - assert_invariant(); - } - - // add element to array (perfect forwarding) - auto res = m_value.object->emplace(std::forward(args)...); - // create result iterator and set iterator to the result of emplace - auto it = begin(); - it.m_it.object_iterator = res.first; - - // return pair of iterator and boolean - return {it, res.second}; - } - - /*! - @brief inserts element - - Inserts element @a val before iterator @a pos. - - @param[in] pos iterator before which the content will be inserted; may be - the end() iterator - @param[in] val element to insert - @return iterator pointing to the inserted @a val. - - @throw type_error.309 if called on JSON values other than arrays; - example: `"cannot use insert() with string"` - @throw invalid_iterator.202 if @a pos is not an iterator of *this; - example: `"iterator does not fit current value"` - - @complexity Constant plus linear in the distance between @a pos and end of - the container. - - @liveexample{The example shows how `insert()` is used.,insert} - - @since version 1.0.0 - */ - iterator insert(const_iterator pos, const basic_json& val) - { - // insert only works for arrays - if (is_array()) - { - // check if iterator pos fits to this JSON value - if (pos.m_object != this) - { - JSON_THROW(invalid_iterator(202, "iterator does not fit current value")); - } - - // insert to array and return iterator - iterator result(this); - result.m_it.array_iterator = m_value.array->insert(pos.m_it.array_iterator, val); - return result; - } - - JSON_THROW(type_error(309, "cannot use insert() with " + type_name())); - } - - /*! - @brief inserts element - @copydoc insert(const_iterator, const basic_json&) - */ - iterator insert(const_iterator pos, basic_json&& val) - { - return insert(pos, val); - } - - /*! - @brief inserts elements - - Inserts @a cnt copies of @a val before iterator @a pos. - - @param[in] pos iterator before which the content will be inserted; may be - the end() iterator - @param[in] cnt number of copies of @a val to insert - @param[in] val element to insert - @return iterator pointing to the first element inserted, or @a pos if - `cnt==0` - - @throw type_error.309 if called on JSON values other than arrays; example: - `"cannot use insert() with string"` - @throw invalid_iterator.202 if @a pos is not an iterator of *this; - example: `"iterator does not fit current value"` - - @complexity Linear in @a cnt plus linear in the distance between @a pos - and end of the container. - - @liveexample{The example shows how `insert()` is used.,insert__count} - - @since version 1.0.0 - */ - iterator insert(const_iterator pos, size_type cnt, const basic_json& val) - { - // insert only works for arrays - if (is_array()) - { - // check if iterator pos fits to this JSON value - if (pos.m_object != this) - { - JSON_THROW(invalid_iterator(202, "iterator does not fit current value")); - } - - // insert to array and return iterator - iterator result(this); - result.m_it.array_iterator = m_value.array->insert(pos.m_it.array_iterator, cnt, val); - return result; - } - - JSON_THROW(type_error(309, "cannot use insert() with " + type_name())); - } - - /*! - @brief inserts elements - - Inserts elements from range `[first, last)` before iterator @a pos. - - @param[in] pos iterator before which the content will be inserted; may be - the end() iterator - @param[in] first begin of the range of elements to insert - @param[in] last end of the range of elements to insert - - @throw type_error.309 if called on JSON values other than arrays; example: - `"cannot use insert() with string"` - @throw invalid_iterator.202 if @a pos is not an iterator of *this; - example: `"iterator does not fit current value"` - @throw invalid_iterator.210 if @a first and @a last do not belong to the - same JSON value; example: `"iterators do not fit"` - @throw invalid_iterator.211 if @a first or @a last are iterators into - container for which insert is called; example: `"passed iterators may not - belong to container"` - - @return iterator pointing to the first element inserted, or @a pos if - `first==last` - - @complexity Linear in `std::distance(first, last)` plus linear in the - distance between @a pos and end of the container. - - @liveexample{The example shows how `insert()` is used.,insert__range} - - @since version 1.0.0 - */ - iterator insert(const_iterator pos, const_iterator first, const_iterator last) - { - // insert only works for arrays - if (not is_array()) - { - JSON_THROW(type_error(309, "cannot use insert() with " + type_name())); - } - - // check if iterator pos fits to this JSON value - if (pos.m_object != this) - { - JSON_THROW(invalid_iterator(202, "iterator does not fit current value")); - } - - // check if range iterators belong to the same JSON object - if (first.m_object != last.m_object) - { - JSON_THROW(invalid_iterator(210, "iterators do not fit")); - } - - if (first.m_object == this or last.m_object == this) - { - JSON_THROW(invalid_iterator(211, "passed iterators may not belong to container")); - } - - // insert to array and return iterator - iterator result(this); - result.m_it.array_iterator = m_value.array->insert( - pos.m_it.array_iterator, - first.m_it.array_iterator, - last.m_it.array_iterator); - return result; - } - - /*! - @brief inserts elements - - Inserts elements from initializer list @a ilist before iterator @a pos. - - @param[in] pos iterator before which the content will be inserted; may be - the end() iterator - @param[in] ilist initializer list to insert the values from - - @throw type_error.309 if called on JSON values other than arrays; example: - `"cannot use insert() with string"` - @throw invalid_iterator.202 if @a pos is not an iterator of *this; - example: `"iterator does not fit current value"` - - @return iterator pointing to the first element inserted, or @a pos if - `ilist` is empty - - @complexity Linear in `ilist.size()` plus linear in the distance between - @a pos and end of the container. - - @liveexample{The example shows how `insert()` is used.,insert__ilist} - - @since version 1.0.0 - */ - iterator insert(const_iterator pos, std::initializer_list ilist) - { - // insert only works for arrays - if (not is_array()) - { - JSON_THROW(type_error(309, "cannot use insert() with " + type_name())); - } - - // check if iterator pos fits to this JSON value - if (pos.m_object != this) - { - JSON_THROW(invalid_iterator(202, "iterator does not fit current value")); - } - - // insert to array and return iterator - iterator result(this); - result.m_it.array_iterator = m_value.array->insert(pos.m_it.array_iterator, ilist); - return result; - } - - /*! - @brief exchanges the values - - Exchanges the contents of the JSON value with those of @a other. Does not - invoke any move, copy, or swap operations on individual elements. All - iterators and references remain valid. The past-the-end iterator is - invalidated. - - @param[in,out] other JSON value to exchange the contents with - - @complexity Constant. - - @liveexample{The example below shows how JSON values can be swapped with - `swap()`.,swap__reference} - - @since version 1.0.0 - */ - void swap(reference other) noexcept ( - std::is_nothrow_move_constructible::value and - std::is_nothrow_move_assignable::value and - std::is_nothrow_move_constructible::value and - std::is_nothrow_move_assignable::value - ) - { - std::swap(m_type, other.m_type); - std::swap(m_value, other.m_value); - assert_invariant(); - } - - /*! - @brief exchanges the values - - Exchanges the contents of a JSON array with those of @a other. Does not - invoke any move, copy, or swap operations on individual elements. All - iterators and references remain valid. The past-the-end iterator is - invalidated. - - @param[in,out] other array to exchange the contents with - - @throw type_error.310 when JSON value is not an array; example: `"cannot - use swap() with string"` - - @complexity Constant. - - @liveexample{The example below shows how arrays can be swapped with - `swap()`.,swap__array_t} - - @since version 1.0.0 - */ - void swap(array_t& other) - { - // swap only works for arrays - if (is_array()) - { - std::swap(*(m_value.array), other); - } - else - { - JSON_THROW(type_error(310, "cannot use swap() with " + type_name())); - } - } - - /*! - @brief exchanges the values - - Exchanges the contents of a JSON object with those of @a other. Does not - invoke any move, copy, or swap operations on individual elements. All - iterators and references remain valid. The past-the-end iterator is - invalidated. - - @param[in,out] other object to exchange the contents with - - @throw type_error.310 when JSON value is not an object; example: - `"cannot use swap() with string"` - - @complexity Constant. - - @liveexample{The example below shows how objects can be swapped with - `swap()`.,swap__object_t} - - @since version 1.0.0 - */ - void swap(object_t& other) - { - // swap only works for objects - if (is_object()) - { - std::swap(*(m_value.object), other); - } - else - { - JSON_THROW(type_error(310, "cannot use swap() with " + type_name())); - } - } - - /*! - @brief exchanges the values - - Exchanges the contents of a JSON string with those of @a other. Does not - invoke any move, copy, or swap operations on individual elements. All - iterators and references remain valid. The past-the-end iterator is - invalidated. - - @param[in,out] other string to exchange the contents with - - @throw type_error.310 when JSON value is not a string; example: `"cannot - use swap() with boolean"` - - @complexity Constant. - - @liveexample{The example below shows how strings can be swapped with - `swap()`.,swap__string_t} - - @since version 1.0.0 - */ - void swap(string_t& other) - { - // swap only works for strings - if (is_string()) - { - std::swap(*(m_value.string), other); - } - else - { - JSON_THROW(type_error(310, "cannot use swap() with " + type_name())); - } - } - - /// @} - - public: - ////////////////////////////////////////// - // lexicographical comparison operators // - ////////////////////////////////////////// - - /// @name lexicographical comparison operators - /// @{ - - /*! - @brief comparison: equal - - Compares two JSON values for equality according to the following rules: - - Two JSON values are equal if (1) they are from the same type and (2) - their stored values are the same. - - Integer and floating-point numbers are automatically converted before - comparison. Floating-point numbers are compared indirectly: two - floating-point numbers `f1` and `f2` are considered equal if neither - `f1 > f2` nor `f2 > f1` holds. - - Two JSON null values are equal. - - @param[in] lhs first JSON value to consider - @param[in] rhs second JSON value to consider - @return whether the values @a lhs and @a rhs are equal - - @complexity Linear. - - @liveexample{The example demonstrates comparing several JSON - types.,operator__equal} - - @since version 1.0.0 - */ - friend bool operator==(const_reference lhs, const_reference rhs) noexcept - { - const auto lhs_type = lhs.type(); - const auto rhs_type = rhs.type(); - - if (lhs_type == rhs_type) - { - switch (lhs_type) - { - case value_t::array: - { - return *lhs.m_value.array == *rhs.m_value.array; - } - case value_t::object: - { - return *lhs.m_value.object == *rhs.m_value.object; - } - case value_t::null: - { - return true; - } - case value_t::string: - { - return *lhs.m_value.string == *rhs.m_value.string; - } - case value_t::boolean: - { - return lhs.m_value.boolean == rhs.m_value.boolean; - } - case value_t::number_integer: - { - return lhs.m_value.number_integer == rhs.m_value.number_integer; - } - case value_t::number_unsigned: - { - return lhs.m_value.number_unsigned == rhs.m_value.number_unsigned; - } - case value_t::number_float: - { - return lhs.m_value.number_float == rhs.m_value.number_float; - } - default: - { - return false; - } - } - } - else if (lhs_type == value_t::number_integer and rhs_type == value_t::number_float) - { - return static_cast(lhs.m_value.number_integer) == rhs.m_value.number_float; - } - else if (lhs_type == value_t::number_float and rhs_type == value_t::number_integer) - { - return lhs.m_value.number_float == static_cast(rhs.m_value.number_integer); - } - else if (lhs_type == value_t::number_unsigned and rhs_type == value_t::number_float) - { - return static_cast(lhs.m_value.number_unsigned) == rhs.m_value.number_float; - } - else if (lhs_type == value_t::number_float and rhs_type == value_t::number_unsigned) - { - return lhs.m_value.number_float == static_cast(rhs.m_value.number_unsigned); - } - else if (lhs_type == value_t::number_unsigned and rhs_type == value_t::number_integer) - { - return static_cast(lhs.m_value.number_unsigned) == rhs.m_value.number_integer; - } - else if (lhs_type == value_t::number_integer and rhs_type == value_t::number_unsigned) - { - return lhs.m_value.number_integer == static_cast(rhs.m_value.number_unsigned); - } - - return false; - } - - /*! - @brief comparison: equal - @copydoc operator==(const_reference, const_reference) - */ - template::value, int>::type = 0> - friend bool operator==(const_reference lhs, const ScalarType rhs) noexcept - { - return (lhs == basic_json(rhs)); - } - - /*! - @brief comparison: equal - @copydoc operator==(const_reference, const_reference) - */ - template::value, int>::type = 0> - friend bool operator==(const ScalarType lhs, const_reference rhs) noexcept - { - return (basic_json(lhs) == rhs); - } - - /*! - @brief comparison: not equal - - Compares two JSON values for inequality by calculating `not (lhs == rhs)`. - - @param[in] lhs first JSON value to consider - @param[in] rhs second JSON value to consider - @return whether the values @a lhs and @a rhs are not equal - - @complexity Linear. - - @liveexample{The example demonstrates comparing several JSON - types.,operator__notequal} - - @since version 1.0.0 - */ - friend bool operator!=(const_reference lhs, const_reference rhs) noexcept - { - return not (lhs == rhs); - } - - /*! - @brief comparison: not equal - @copydoc operator!=(const_reference, const_reference) - */ - template::value, int>::type = 0> - friend bool operator!=(const_reference lhs, const ScalarType rhs) noexcept - { - return (lhs != basic_json(rhs)); - } - - /*! - @brief comparison: not equal - @copydoc operator!=(const_reference, const_reference) - */ - template::value, int>::type = 0> - friend bool operator!=(const ScalarType lhs, const_reference rhs) noexcept - { - return (basic_json(lhs) != rhs); - } - - /*! - @brief comparison: less than - - Compares whether one JSON value @a lhs is less than another JSON value @a - rhs according to the following rules: - - If @a lhs and @a rhs have the same type, the values are compared using - the default `<` operator. - - Integer and floating-point numbers are automatically converted before - comparison - - In case @a lhs and @a rhs have different types, the values are ignored - and the order of the types is considered, see - @ref operator<(const value_t, const value_t). - - @param[in] lhs first JSON value to consider - @param[in] rhs second JSON value to consider - @return whether @a lhs is less than @a rhs - - @complexity Linear. - - @liveexample{The example demonstrates comparing several JSON - types.,operator__less} - - @since version 1.0.0 - */ - friend bool operator<(const_reference lhs, const_reference rhs) noexcept - { - const auto lhs_type = lhs.type(); - const auto rhs_type = rhs.type(); - - if (lhs_type == rhs_type) - { - switch (lhs_type) - { - case value_t::array: - { - return *lhs.m_value.array < *rhs.m_value.array; - } - case value_t::object: - { - return *lhs.m_value.object < *rhs.m_value.object; - } - case value_t::null: - { - return false; - } - case value_t::string: - { - return *lhs.m_value.string < *rhs.m_value.string; - } - case value_t::boolean: - { - return lhs.m_value.boolean < rhs.m_value.boolean; - } - case value_t::number_integer: - { - return lhs.m_value.number_integer < rhs.m_value.number_integer; - } - case value_t::number_unsigned: - { - return lhs.m_value.number_unsigned < rhs.m_value.number_unsigned; - } - case value_t::number_float: - { - return lhs.m_value.number_float < rhs.m_value.number_float; - } - default: - { - return false; - } - } - } - else if (lhs_type == value_t::number_integer and rhs_type == value_t::number_float) - { - return static_cast(lhs.m_value.number_integer) < rhs.m_value.number_float; - } - else if (lhs_type == value_t::number_float and rhs_type == value_t::number_integer) - { - return lhs.m_value.number_float < static_cast(rhs.m_value.number_integer); - } - else if (lhs_type == value_t::number_unsigned and rhs_type == value_t::number_float) - { - return static_cast(lhs.m_value.number_unsigned) < rhs.m_value.number_float; - } - else if (lhs_type == value_t::number_float and rhs_type == value_t::number_unsigned) - { - return lhs.m_value.number_float < static_cast(rhs.m_value.number_unsigned); - } - else if (lhs_type == value_t::number_integer and rhs_type == value_t::number_unsigned) - { - return lhs.m_value.number_integer < static_cast(rhs.m_value.number_unsigned); - } - else if (lhs_type == value_t::number_unsigned and rhs_type == value_t::number_integer) - { - return static_cast(lhs.m_value.number_unsigned) < rhs.m_value.number_integer; - } - - // We only reach this line if we cannot compare values. In that case, - // we compare types. Note we have to call the operator explicitly, - // because MSVC has problems otherwise. - return operator<(lhs_type, rhs_type); - } - - /*! - @brief comparison: less than or equal - - Compares whether one JSON value @a lhs is less than or equal to another - JSON value by calculating `not (rhs < lhs)`. - - @param[in] lhs first JSON value to consider - @param[in] rhs second JSON value to consider - @return whether @a lhs is less than or equal to @a rhs - - @complexity Linear. - - @liveexample{The example demonstrates comparing several JSON - types.,operator__greater} - - @since version 1.0.0 - */ - friend bool operator<=(const_reference lhs, const_reference rhs) noexcept - { - return not (rhs < lhs); - } - - /*! - @brief comparison: greater than - - Compares whether one JSON value @a lhs is greater than another - JSON value by calculating `not (lhs <= rhs)`. - - @param[in] lhs first JSON value to consider - @param[in] rhs second JSON value to consider - @return whether @a lhs is greater than to @a rhs - - @complexity Linear. - - @liveexample{The example demonstrates comparing several JSON - types.,operator__lessequal} - - @since version 1.0.0 - */ - friend bool operator>(const_reference lhs, const_reference rhs) noexcept - { - return not (lhs <= rhs); - } - - /*! - @brief comparison: greater than or equal - - Compares whether one JSON value @a lhs is greater than or equal to another - JSON value by calculating `not (lhs < rhs)`. - - @param[in] lhs first JSON value to consider - @param[in] rhs second JSON value to consider - @return whether @a lhs is greater than or equal to @a rhs - - @complexity Linear. - - @liveexample{The example demonstrates comparing several JSON - types.,operator__greaterequal} - - @since version 1.0.0 - */ - friend bool operator>=(const_reference lhs, const_reference rhs) noexcept - { - return not (lhs < rhs); - } - - /// @} - - - /////////////////// - // serialization // - /////////////////// - - /// @name serialization - /// @{ - - private: - /*! - @brief wrapper around the serialization functions - */ - class serializer - { - private: - serializer(const serializer&) = delete; - serializer& operator=(const serializer&) = delete; - - public: - /*! - @param[in] s output stream to serialize to - */ - serializer(std::ostream& s) - : o(s), loc(std::localeconv()), - thousands_sep(!loc->thousands_sep ? '\0' : loc->thousands_sep[0]), - decimal_point(!loc->decimal_point ? '\0' : loc->decimal_point[0]) - {} - - /*! - @brief internal implementation of the serialization function - - This function is called by the public member function dump and - organizes the serialization internally. The indentation level is - propagated as additional parameter. In case of arrays and objects, the - function is called recursively. - - - strings and object keys are escaped using `escape_string()` - - integer numbers are converted implicitly via `operator<<` - - floating-point numbers are converted to a string using `"%g"` format - - @param[in] val value to serialize - @param[in] pretty_print whether the output shall be pretty-printed - @param[in] indent_step the indent level - @param[in] current_indent the current indent level (only used internally) - */ - void dump(const basic_json& val, - const bool pretty_print, - const unsigned int indent_step, - const unsigned int current_indent = 0) - { - switch (val.m_type) - { - case value_t::object: - { - if (val.m_value.object->empty()) - { - o.write("{}", 2); - return; - } - - if (pretty_print) - { - o.write("{\n", 2); - - // variable to hold indentation for recursive calls - const auto new_indent = current_indent + indent_step; - if (indent_string.size() < new_indent) - { - indent_string.resize(new_indent, ' '); - } - - // first n-1 elements - auto i = val.m_value.object->cbegin(); - for (size_t cnt = 0; cnt < val.m_value.object->size() - 1; ++cnt, ++i) - { - o.write(indent_string.c_str(), static_cast(new_indent)); - o.put('\"'); - dump_escaped(i->first); - o.write("\": ", 3); - dump(i->second, true, indent_step, new_indent); - o.write(",\n", 2); - } - - // last element - assert(i != val.m_value.object->cend()); - o.write(indent_string.c_str(), static_cast(new_indent)); - o.put('\"'); - dump_escaped(i->first); - o.write("\": ", 3); - dump(i->second, true, indent_step, new_indent); - - o.put('\n'); - o.write(indent_string.c_str(), static_cast(current_indent)); - o.put('}'); - } - else - { - o.put('{'); - - // first n-1 elements - auto i = val.m_value.object->cbegin(); - for (size_t cnt = 0; cnt < val.m_value.object->size() - 1; ++cnt, ++i) - { - o.put('\"'); - dump_escaped(i->first); - o.write("\":", 2); - dump(i->second, false, indent_step, current_indent); - o.put(','); - } - - // last element - assert(i != val.m_value.object->cend()); - o.put('\"'); - dump_escaped(i->first); - o.write("\":", 2); - dump(i->second, false, indent_step, current_indent); - - o.put('}'); - } - - return; - } - - case value_t::array: - { - if (val.m_value.array->empty()) - { - o.write("[]", 2); - return; - } - - if (pretty_print) - { - o.write("[\n", 2); - - // variable to hold indentation for recursive calls - const auto new_indent = current_indent + indent_step; - if (indent_string.size() < new_indent) - { - indent_string.resize(new_indent, ' '); - } - - // first n-1 elements - for (auto i = val.m_value.array->cbegin(); i != val.m_value.array->cend() - 1; ++i) - { - o.write(indent_string.c_str(), static_cast(new_indent)); - dump(*i, true, indent_step, new_indent); - o.write(",\n", 2); - } - - // last element - assert(not val.m_value.array->empty()); - o.write(indent_string.c_str(), static_cast(new_indent)); - dump(val.m_value.array->back(), true, indent_step, new_indent); - - o.put('\n'); - o.write(indent_string.c_str(), static_cast(current_indent)); - o.put(']'); - } - else - { - o.put('['); - - // first n-1 elements - for (auto i = val.m_value.array->cbegin(); i != val.m_value.array->cend() - 1; ++i) - { - dump(*i, false, indent_step, current_indent); - o.put(','); - } - - // last element - assert(not val.m_value.array->empty()); - dump(val.m_value.array->back(), false, indent_step, current_indent); - - o.put(']'); - } - - return; - } - - case value_t::string: - { - o.put('\"'); - dump_escaped(*val.m_value.string); - o.put('\"'); - return; - } - - case value_t::boolean: - { - if (val.m_value.boolean) - { - o.write("true", 4); - } - else - { - o.write("false", 5); - } - return; - } - - case value_t::number_integer: - { - dump_integer(val.m_value.number_integer); - return; - } - - case value_t::number_unsigned: - { - dump_integer(val.m_value.number_unsigned); - return; - } - - case value_t::number_float: - { - dump_float(val.m_value.number_float); - return; - } - - case value_t::discarded: - { - o.write("", 11); - return; - } - - case value_t::null: - { - o.write("null", 4); - return; - } - } - } - - private: - /*! - @brief calculates the extra space to escape a JSON string - - @param[in] s the string to escape - @return the number of characters required to escape string @a s - - @complexity Linear in the length of string @a s. - */ - static std::size_t extra_space(const string_t& s) noexcept - { - return std::accumulate(s.begin(), s.end(), size_t{}, - [](size_t res, typename string_t::value_type c) - { - switch (c) - { - case '"': - case '\\': - case '\b': - case '\f': - case '\n': - case '\r': - case '\t': - { - // from c (1 byte) to \x (2 bytes) - return res + 1; - } - - case 0x00: - case 0x01: - case 0x02: - case 0x03: - case 0x04: - case 0x05: - case 0x06: - case 0x07: - case 0x0b: - case 0x0e: - case 0x0f: - case 0x10: - case 0x11: - case 0x12: - case 0x13: - case 0x14: - case 0x15: - case 0x16: - case 0x17: - case 0x18: - case 0x19: - case 0x1a: - case 0x1b: - case 0x1c: - case 0x1d: - case 0x1e: - case 0x1f: - { - // from c (1 byte) to \uxxxx (6 bytes) - return res + 5; - } - - default: - { - return res; - } - } - }); - } - - /*! - @brief dump escaped string - - Escape a string by replacing certain special characters by a sequence - of an escape character (backslash) and another character and other - control characters by a sequence of "\u" followed by a four-digit hex - representation. The escaped string is written to output stream @a o. - - @param[in] s the string to escape - - @complexity Linear in the length of string @a s. - */ - void dump_escaped(const string_t& s) const - { - const auto space = extra_space(s); - if (space == 0) - { - o.write(s.c_str(), static_cast(s.size())); - return; - } - - // create a result string of necessary size - string_t result(s.size() + space, '\\'); - std::size_t pos = 0; - - for (const auto& c : s) - { - switch (c) - { - // quotation mark (0x22) - case '"': - { - result[pos + 1] = '"'; - pos += 2; - break; - } - - // reverse solidus (0x5c) - case '\\': - { - // nothing to change - pos += 2; - break; - } - - // backspace (0x08) - case '\b': - { - result[pos + 1] = 'b'; - pos += 2; - break; - } - - // formfeed (0x0c) - case '\f': - { - result[pos + 1] = 'f'; - pos += 2; - break; - } - - // newline (0x0a) - case '\n': - { - result[pos + 1] = 'n'; - pos += 2; - break; - } - - // carriage return (0x0d) - case '\r': - { - result[pos + 1] = 'r'; - pos += 2; - break; - } - - // horizontal tab (0x09) - case '\t': - { - result[pos + 1] = 't'; - pos += 2; - break; - } - - case 0x00: - case 0x01: - case 0x02: - case 0x03: - case 0x04: - case 0x05: - case 0x06: - case 0x07: - case 0x0b: - case 0x0e: - case 0x0f: - case 0x10: - case 0x11: - case 0x12: - case 0x13: - case 0x14: - case 0x15: - case 0x16: - case 0x17: - case 0x18: - case 0x19: - case 0x1a: - case 0x1b: - case 0x1c: - case 0x1d: - case 0x1e: - case 0x1f: - { - // convert a number 0..15 to its hex representation - // (0..f) - static const char hexify[16] = - { - '0', '1', '2', '3', '4', '5', '6', '7', - '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' - }; - - // print character c as \uxxxx - for (const char m : - { 'u', '0', '0', hexify[c >> 4], hexify[c & 0x0f] - }) - { - result[++pos] = m; - } - - ++pos; - break; - } - - default: - { - // all other characters are added as-is - result[pos++] = c; - break; - } - } - } - - assert(pos == s.size() + space); - o.write(result.c_str(), static_cast(result.size())); - } - - /*! - @brief dump an integer - - Dump a given integer to output stream @a o. Works internally with - @a number_buffer. - - @param[in] x integer number (signed or unsigned) to dump - @tparam NumberType either @a number_integer_t or @a number_unsigned_t - */ - template::value or - std::is_same::value, int> = 0> - void dump_integer(NumberType x) - { - // special case for "0" - if (x == 0) - { - o.put('0'); - return; - } - - const bool is_negative = x < 0; - size_t i = 0; - - // spare 1 byte for '\0' - while (x != 0 and i < number_buffer.size() - 1) - { - const auto digit = std::labs(static_cast(x % 10)); - number_buffer[i++] = static_cast('0' + digit); - x /= 10; - } - - // make sure the number has been processed completely - assert(x == 0); - - if (is_negative) - { - // make sure there is capacity for the '-' - assert(i < number_buffer.size() - 2); - number_buffer[i++] = '-'; - } - - std::reverse(number_buffer.begin(), number_buffer.begin() + i); - o.write(number_buffer.data(), static_cast(i)); - } - - /*! - @brief dump a floating-point number - - Dump a given floating-point number to output stream @a o. Works - internally with @a number_buffer. - - @param[in] x floating-point number to dump - */ - void dump_float(number_float_t x) - { - // NaN / inf - if (not std::isfinite(x) or std::isnan(x)) - { - o.write("null", 4); - return; - } - - // special case for 0.0 and -0.0 - if (x == 0) - { - if (std::signbit(x)) - { - o.write("-0.0", 4); - } - else - { - o.write("0.0", 3); - } - return; - } - - // get number of digits for a text -> float -> text round-trip - static constexpr auto d = std::numeric_limits::digits10; - - // the actual conversion - std::ptrdiff_t len = snprintf(number_buffer.data(), number_buffer.size(), - "%.*g", d, x); - - // negative value indicates an error - assert(len > 0); - // check if buffer was large enough - assert(static_cast(len) < number_buffer.size()); - - // erase thousands separator - if (thousands_sep != '\0') - { - const auto end = std::remove(number_buffer.begin(), - number_buffer.begin() + len, - thousands_sep); - std::fill(end, number_buffer.end(), '\0'); - assert((end - number_buffer.begin()) <= len); - len = (end - number_buffer.begin()); - } - - // convert decimal point to '.' - if (decimal_point != '\0' and decimal_point != '.') - { - for (auto& c : number_buffer) - { - if (c == decimal_point) - { - c = '.'; - break; - } - } - } - - o.write(number_buffer.data(), static_cast(len)); - - // determine if need to append ".0" - const bool value_is_int_like = std::none_of(number_buffer.begin(), - number_buffer.begin() + len + 1, - [](char c) - { - return c == '.' or c == 'e'; - }); - - if (value_is_int_like) - { - o.write(".0", 2); - } - } - - private: - /// the output of the serializer - std::ostream& o; - - /// a (hopefully) large enough character buffer - std::array number_buffer{{}}; - - /// the locale - const std::lconv* loc = nullptr; - /// the locale's thousand separator character - const char thousands_sep = '\0'; - /// the locale's decimal point character - const char decimal_point = '\0'; - - /// the indentation string - string_t indent_string = string_t(512, ' '); - }; - - public: - /*! - @brief serialize to stream - - Serialize the given JSON value @a j to the output stream @a o. The JSON - value will be serialized using the @ref dump member function. The - indentation of the output can be controlled with the member variable - `width` of the output stream @a o. For instance, using the manipulator - `std::setw(4)` on @a o sets the indentation level to `4` and the - serialization result is the same as calling `dump(4)`. - - @param[in,out] o stream to serialize to - @param[in] j JSON value to serialize - - @return the stream @a o - - @complexity Linear. - - @liveexample{The example below shows the serialization with different - parameters to `width` to adjust the indentation level.,operator_serialize} - - @since version 1.0.0 - */ - friend std::ostream& operator<<(std::ostream& o, const basic_json& j) - { - // read width member and use it as indentation parameter if nonzero - const bool pretty_print = (o.width() > 0); - const auto indentation = (pretty_print ? o.width() : 0); - - // reset width to 0 for subsequent calls to this stream - o.width(0); - - // do the actual serialization - serializer s(o); - s.dump(j, pretty_print, static_cast(indentation)); - return o; - } - - /*! - @brief serialize to stream - @copydoc operator<<(std::ostream&, const basic_json&) - */ - friend std::ostream& operator>>(const basic_json& j, std::ostream& o) - { - return o << j; - } - - /// @} - - - ///////////////////// - // deserialization // - ///////////////////// - - /// @name deserialization - /// @{ - - /*! - @brief deserialize from an array - - This function reads from an array of 1-byte values. - - @pre Each element of the container has a size of 1 byte. Violating this - precondition yields undefined behavior. **This precondition is enforced - with a static assertion.** - - @param[in] array array to read from - @param[in] cb a parser callback function of type @ref parser_callback_t - which is used to control the deserialization by filtering unwanted values - (optional) - - @return result of the deserialization - - @throw parse_error.101 if a parse error occurs; example: `""unexpected end - of input; expected string literal""` - @throw parse_error.102 if to_unicode fails or surrogate error - @throw parse_error.103 if to_unicode fails - - @complexity Linear in the length of the input. The parser is a predictive - LL(1) parser. The complexity can be higher if the parser callback function - @a cb has a super-linear complexity. - - @note A UTF-8 byte order mark is silently ignored. - - @liveexample{The example below demonstrates the `parse()` function reading - from an array.,parse__array__parser_callback_t} - - @since version 2.0.3 - */ - template - static basic_json parse(T (&array)[N], - const parser_callback_t cb = nullptr) - { - // delegate the call to the iterator-range parse overload - return parse(std::begin(array), std::end(array), cb); - } - - /*! - @brief deserialize from string literal - - @tparam CharT character/literal type with size of 1 byte - @param[in] s string literal to read a serialized JSON value from - @param[in] cb a parser callback function of type @ref parser_callback_t - which is used to control the deserialization by filtering unwanted values - (optional) - - @return result of the deserialization - - @throw parse_error.101 in case of an unexpected token - @throw parse_error.102 if to_unicode fails or surrogate error - @throw parse_error.103 if to_unicode fails - - @complexity Linear in the length of the input. The parser is a predictive - LL(1) parser. The complexity can be higher if the parser callback function - @a cb has a super-linear complexity. - - @note A UTF-8 byte order mark is silently ignored. - @note String containers like `std::string` or @ref string_t can be parsed - with @ref parse(const ContiguousContainer&, const parser_callback_t) - - @liveexample{The example below demonstrates the `parse()` function with - and without callback function.,parse__string__parser_callback_t} - - @sa @ref parse(std::istream&, const parser_callback_t) for a version that - reads from an input stream - - @since version 1.0.0 (originally for @ref string_t) - */ - template::value and - std::is_integral::type>::value and - sizeof(typename std::remove_pointer::type) == 1, int>::type = 0> - static basic_json parse(const CharT s, - const parser_callback_t cb = nullptr) - { - return parser(reinterpret_cast(s), cb).parse(); - } - - /*! - @brief deserialize from stream - - @param[in,out] i stream to read a serialized JSON value from - @param[in] cb a parser callback function of type @ref parser_callback_t - which is used to control the deserialization by filtering unwanted values - (optional) - - @return result of the deserialization - - @throw parse_error.101 in case of an unexpected token - @throw parse_error.102 if to_unicode fails or surrogate error - @throw parse_error.103 if to_unicode fails - @throw parse_error.111 if input stream is in a bad state - - @complexity Linear in the length of the input. The parser is a predictive - LL(1) parser. The complexity can be higher if the parser callback function - @a cb has a super-linear complexity. - - @note A UTF-8 byte order mark is silently ignored. - - @liveexample{The example below demonstrates the `parse()` function with - and without callback function.,parse__istream__parser_callback_t} - - @sa @ref parse(const CharT, const parser_callback_t) for a version - that reads from a string - - @since version 1.0.0 - */ - static basic_json parse(std::istream& i, - const parser_callback_t cb = nullptr) - { - return parser(i, cb).parse(); - } - - /*! - @copydoc parse(std::istream&, const parser_callback_t) - */ - static basic_json parse(std::istream&& i, - const parser_callback_t cb = nullptr) - { - return parser(i, cb).parse(); - } - - /*! - @brief deserialize from an iterator range with contiguous storage - - This function reads from an iterator range of a container with contiguous - storage of 1-byte values. Compatible container types include - `std::vector`, `std::string`, `std::array`, `std::valarray`, and - `std::initializer_list`. Furthermore, C-style arrays can be used with - `std::begin()`/`std::end()`. User-defined containers can be used as long - as they implement random-access iterators and a contiguous storage. - - @pre The iterator range is contiguous. Violating this precondition yields - undefined behavior. **This precondition is enforced with an assertion.** - @pre Each element in the range has a size of 1 byte. Violating this - precondition yields undefined behavior. **This precondition is enforced - with a static assertion.** - - @warning There is no way to enforce all preconditions at compile-time. If - the function is called with noncompliant iterators and with - assertions switched off, the behavior is undefined and will most - likely yield segmentation violation. - - @tparam IteratorType iterator of container with contiguous storage - @param[in] first begin of the range to parse (included) - @param[in] last end of the range to parse (excluded) - @param[in] cb a parser callback function of type @ref parser_callback_t - which is used to control the deserialization by filtering unwanted values - (optional) - - @return result of the deserialization - - @throw parse_error.101 in case of an unexpected token - @throw parse_error.102 if to_unicode fails or surrogate error - @throw parse_error.103 if to_unicode fails - - @complexity Linear in the length of the input. The parser is a predictive - LL(1) parser. The complexity can be higher if the parser callback function - @a cb has a super-linear complexity. - - @note A UTF-8 byte order mark is silently ignored. - - @liveexample{The example below demonstrates the `parse()` function reading - from an iterator range.,parse__iteratortype__parser_callback_t} - - @since version 2.0.3 - */ - template::iterator_category>::value, int>::type = 0> - static basic_json parse(IteratorType first, IteratorType last, - const parser_callback_t cb = nullptr) - { - // assertion to check that the iterator range is indeed contiguous, - // see http://stackoverflow.com/a/35008842/266378 for more discussion - assert(std::accumulate(first, last, std::pair(true, 0), - [&first](std::pair res, decltype(*first) val) - { - res.first &= (val == *(std::next(std::addressof(*first), res.second++))); - return res; - }).first); - - // assertion to check that each element is 1 byte long - static_assert(sizeof(typename std::iterator_traits::value_type) == 1, - "each element in the iterator range must have the size of 1 byte"); - - // if iterator range is empty, create a parser with an empty string - // to generate "unexpected EOF" error message - if (std::distance(first, last) <= 0) - { - return parser("").parse(); - } - - return parser(first, last, cb).parse(); - } - - /*! - @brief deserialize from a container with contiguous storage - - This function reads from a container with contiguous storage of 1-byte - values. Compatible container types include `std::vector`, `std::string`, - `std::array`, and `std::initializer_list`. User-defined containers can be - used as long as they implement random-access iterators and a contiguous - storage. - - @pre The container storage is contiguous. Violating this precondition - yields undefined behavior. **This precondition is enforced with an - assertion.** - @pre Each element of the container has a size of 1 byte. Violating this - precondition yields undefined behavior. **This precondition is enforced - with a static assertion.** - - @warning There is no way to enforce all preconditions at compile-time. If - the function is called with a noncompliant container and with - assertions switched off, the behavior is undefined and will most - likely yield segmentation violation. - - @tparam ContiguousContainer container type with contiguous storage - @param[in] c container to read from - @param[in] cb a parser callback function of type @ref parser_callback_t - which is used to control the deserialization by filtering unwanted values - (optional) - - @return result of the deserialization - - @throw parse_error.101 in case of an unexpected token - @throw parse_error.102 if to_unicode fails or surrogate error - @throw parse_error.103 if to_unicode fails - - @complexity Linear in the length of the input. The parser is a predictive - LL(1) parser. The complexity can be higher if the parser callback function - @a cb has a super-linear complexity. - - @note A UTF-8 byte order mark is silently ignored. - - @liveexample{The example below demonstrates the `parse()` function reading - from a contiguous container.,parse__contiguouscontainer__parser_callback_t} - - @since version 2.0.3 - */ - template::value and - std::is_base_of< - std::random_access_iterator_tag, - typename std::iterator_traits()))>::iterator_category>::value - , int>::type = 0> - static basic_json parse(const ContiguousContainer& c, - const parser_callback_t cb = nullptr) - { - // delegate the call to the iterator-range parse overload - return parse(std::begin(c), std::end(c), cb); - } - - /*! - @brief deserialize from stream - - Deserializes an input stream to a JSON value. - - @param[in,out] i input stream to read a serialized JSON value from - @param[in,out] j JSON value to write the deserialized input to - - @throw parse_error.101 in case of an unexpected token - @throw parse_error.102 if to_unicode fails or surrogate error - @throw parse_error.103 if to_unicode fails - @throw parse_error.111 if input stream is in a bad state - - @complexity Linear in the length of the input. The parser is a predictive - LL(1) parser. - - @note A UTF-8 byte order mark is silently ignored. - - @liveexample{The example below shows how a JSON value is constructed by - reading a serialization from a stream.,operator_deserialize} - - @sa parse(std::istream&, const parser_callback_t) for a variant with a - parser callback function to filter values while parsing - - @since version 1.0.0 - */ - friend std::istream& operator<<(basic_json& j, std::istream& i) - { - j = parser(i).parse(); - return i; - } - - /*! - @brief deserialize from stream - @copydoc operator<<(basic_json&, std::istream&) - */ - friend std::istream& operator>>(std::istream& i, basic_json& j) - { - j = parser(i).parse(); - return i; - } - - /// @} - - ////////////////////////////////////////// - // binary serialization/deserialization // - ////////////////////////////////////////// - - /// @name binary serialization/deserialization support - /// @{ - - private: - /*! - @note Some code in the switch cases has been copied, because otherwise - copilers would complain about implicit fallthrough and there is no - portable attribute to mute such warnings. - */ - template - static void add_to_vector(std::vector& vec, size_t bytes, const T number) - { - assert(bytes == 1 or bytes == 2 or bytes == 4 or bytes == 8); - - switch (bytes) - { - case 8: - { - vec.push_back(static_cast((static_cast(number) >> 070) & 0xff)); - vec.push_back(static_cast((static_cast(number) >> 060) & 0xff)); - vec.push_back(static_cast((static_cast(number) >> 050) & 0xff)); - vec.push_back(static_cast((static_cast(number) >> 040) & 0xff)); - vec.push_back(static_cast((number >> 030) & 0xff)); - vec.push_back(static_cast((number >> 020) & 0xff)); - vec.push_back(static_cast((number >> 010) & 0xff)); - vec.push_back(static_cast(number & 0xff)); - break; - } - - case 4: - { - vec.push_back(static_cast((number >> 030) & 0xff)); - vec.push_back(static_cast((number >> 020) & 0xff)); - vec.push_back(static_cast((number >> 010) & 0xff)); - vec.push_back(static_cast(number & 0xff)); - break; - } - - case 2: - { - vec.push_back(static_cast((number >> 010) & 0xff)); - vec.push_back(static_cast(number & 0xff)); - break; - } - - case 1: - { - vec.push_back(static_cast(number & 0xff)); - break; - } - } - } - - /*! - @brief take sufficient bytes from a vector to fill an integer variable - - In the context of binary serialization formats, we need to read several - bytes from a byte vector and combine them to multi-byte integral data - types. - - @param[in] vec byte vector to read from - @param[in] current_index the position in the vector after which to read - - @return the next sizeof(T) bytes from @a vec, in reverse order as T - - @tparam T the integral return type - - @throw parse_error.110 if there are less than sizeof(T)+1 bytes in the - vector @a vec to read - - In the for loop, the bytes from the vector are copied in reverse order into - the return value. In the figures below, let sizeof(T)=4 and `i` be the loop - variable. - - Precondition: - - vec: | | | a | b | c | d | T: | | | | | - ^ ^ ^ ^ - current_index i ptr sizeof(T) - - Postcondition: - - vec: | | | a | b | c | d | T: | d | c | b | a | - ^ ^ ^ - | i ptr - current_index - - @sa Code adapted from . - */ - template - static T get_from_vector(const std::vector& vec, const size_t current_index) - { - // check if we can read sizeof(T) bytes starting the next index - check_length(vec.size(), sizeof(T), current_index + 1); - - T result; - auto* ptr = reinterpret_cast(&result); - for (size_t i = 0; i < sizeof(T); ++i) - { - *ptr++ = vec[current_index + sizeof(T) - i]; - } - return result; - } - - /*! - @brief create a MessagePack serialization of a given JSON value - - This is a straightforward implementation of the MessagePack specification. - - @param[in] j JSON value to serialize - @param[in,out] v byte vector to write the serialization to - - @sa https://github.com/msgpack/msgpack/blob/master/spec.md - */ - static void to_msgpack_internal(const basic_json& j, std::vector& v) - { - switch (j.type()) - { - case value_t::null: - { - // nil - v.push_back(0xc0); - break; - } - - case value_t::boolean: - { - // true and false - v.push_back(j.m_value.boolean ? 0xc3 : 0xc2); - break; - } - - case value_t::number_integer: - { - if (j.m_value.number_integer >= 0) - { - // MessagePack does not differentiate between positive - // signed integers and unsigned integers. Therefore, we - // used the code from the value_t::number_unsigned case - // here. - if (j.m_value.number_unsigned < 128) - { - // positive fixnum - add_to_vector(v, 1, j.m_value.number_unsigned); - } - else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) - { - // uint 8 - v.push_back(0xcc); - add_to_vector(v, 1, j.m_value.number_unsigned); - } - else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) - { - // uint 16 - v.push_back(0xcd); - add_to_vector(v, 2, j.m_value.number_unsigned); - } - else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) - { - // uint 32 - v.push_back(0xce); - add_to_vector(v, 4, j.m_value.number_unsigned); - } - else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) - { - // uint 64 - v.push_back(0xcf); - add_to_vector(v, 8, j.m_value.number_unsigned); - } - } - else - { - if (j.m_value.number_integer >= -32) - { - // negative fixnum - add_to_vector(v, 1, j.m_value.number_integer); - } - else if (j.m_value.number_integer >= (std::numeric_limits::min)() and j.m_value.number_integer <= (std::numeric_limits::max)()) - { - // int 8 - v.push_back(0xd0); - add_to_vector(v, 1, j.m_value.number_integer); - } - else if (j.m_value.number_integer >= (std::numeric_limits::min)() and j.m_value.number_integer <= (std::numeric_limits::max)()) - { - // int 16 - v.push_back(0xd1); - add_to_vector(v, 2, j.m_value.number_integer); - } - else if (j.m_value.number_integer >= (std::numeric_limits::min)() and j.m_value.number_integer <= (std::numeric_limits::max)()) - { - // int 32 - v.push_back(0xd2); - add_to_vector(v, 4, j.m_value.number_integer); - } - else if (j.m_value.number_integer >= (std::numeric_limits::min)() and j.m_value.number_integer <= (std::numeric_limits::max)()) - { - // int 64 - v.push_back(0xd3); - add_to_vector(v, 8, j.m_value.number_integer); - } - } - break; - } - - case value_t::number_unsigned: - { - if (j.m_value.number_unsigned < 128) - { - // positive fixnum - add_to_vector(v, 1, j.m_value.number_unsigned); - } - else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) - { - // uint 8 - v.push_back(0xcc); - add_to_vector(v, 1, j.m_value.number_unsigned); - } - else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) - { - // uint 16 - v.push_back(0xcd); - add_to_vector(v, 2, j.m_value.number_unsigned); - } - else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) - { - // uint 32 - v.push_back(0xce); - add_to_vector(v, 4, j.m_value.number_unsigned); - } - else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) - { - // uint 64 - v.push_back(0xcf); - add_to_vector(v, 8, j.m_value.number_unsigned); - } - break; - } - - case value_t::number_float: - { - // float 64 - v.push_back(0xcb); - const auto* helper = reinterpret_cast(&(j.m_value.number_float)); - for (size_t i = 0; i < 8; ++i) - { - v.push_back(helper[7 - i]); - } - break; - } - - case value_t::string: - { - const auto N = j.m_value.string->size(); - if (N <= 31) - { - // fixstr - v.push_back(static_cast(0xa0 | N)); - } - else if (N <= 255) - { - // str 8 - v.push_back(0xd9); - add_to_vector(v, 1, N); - } - else if (N <= 65535) - { - // str 16 - v.push_back(0xda); - add_to_vector(v, 2, N); - } - else if (N <= 4294967295) - { - // str 32 - v.push_back(0xdb); - add_to_vector(v, 4, N); - } - - // append string - std::copy(j.m_value.string->begin(), j.m_value.string->end(), - std::back_inserter(v)); - break; - } - - case value_t::array: - { - const auto N = j.m_value.array->size(); - if (N <= 15) - { - // fixarray - v.push_back(static_cast(0x90 | N)); - } - else if (N <= 0xffff) - { - // array 16 - v.push_back(0xdc); - add_to_vector(v, 2, N); - } - else if (N <= 0xffffffff) - { - // array 32 - v.push_back(0xdd); - add_to_vector(v, 4, N); - } - - // append each element - for (const auto& el : *j.m_value.array) - { - to_msgpack_internal(el, v); - } - break; - } - - case value_t::object: - { - const auto N = j.m_value.object->size(); - if (N <= 15) - { - // fixmap - v.push_back(static_cast(0x80 | (N & 0xf))); - } - else if (N <= 65535) - { - // map 16 - v.push_back(0xde); - add_to_vector(v, 2, N); - } - else if (N <= 4294967295) - { - // map 32 - v.push_back(0xdf); - add_to_vector(v, 4, N); - } - - // append each element - for (const auto& el : *j.m_value.object) - { - to_msgpack_internal(el.first, v); - to_msgpack_internal(el.second, v); - } - break; - } - - default: - { - break; - } - } - } - - /*! - @brief create a CBOR serialization of a given JSON value - - This is a straightforward implementation of the CBOR specification. - - @param[in] j JSON value to serialize - @param[in,out] v byte vector to write the serialization to - - @sa https://tools.ietf.org/html/rfc7049 - */ - static void to_cbor_internal(const basic_json& j, std::vector& v) - { - switch (j.type()) - { - case value_t::null: - { - v.push_back(0xf6); - break; - } - - case value_t::boolean: - { - v.push_back(j.m_value.boolean ? 0xf5 : 0xf4); - break; - } - - case value_t::number_integer: - { - if (j.m_value.number_integer >= 0) - { - // CBOR does not differentiate between positive signed - // integers and unsigned integers. Therefore, we used the - // code from the value_t::number_unsigned case here. - if (j.m_value.number_integer <= 0x17) - { - add_to_vector(v, 1, j.m_value.number_integer); - } - else if (j.m_value.number_integer <= (std::numeric_limits::max)()) - { - v.push_back(0x18); - // one-byte uint8_t - add_to_vector(v, 1, j.m_value.number_integer); - } - else if (j.m_value.number_integer <= (std::numeric_limits::max)()) - { - v.push_back(0x19); - // two-byte uint16_t - add_to_vector(v, 2, j.m_value.number_integer); - } - else if (j.m_value.number_integer <= (std::numeric_limits::max)()) - { - v.push_back(0x1a); - // four-byte uint32_t - add_to_vector(v, 4, j.m_value.number_integer); - } - else - { - v.push_back(0x1b); - // eight-byte uint64_t - add_to_vector(v, 8, j.m_value.number_integer); - } - } - else - { - // The conversions below encode the sign in the first - // byte, and the value is converted to a positive number. - const auto positive_number = -1 - j.m_value.number_integer; - if (j.m_value.number_integer >= -24) - { - v.push_back(static_cast(0x20 + positive_number)); - } - else if (positive_number <= (std::numeric_limits::max)()) - { - // int 8 - v.push_back(0x38); - add_to_vector(v, 1, positive_number); - } - else if (positive_number <= (std::numeric_limits::max)()) - { - // int 16 - v.push_back(0x39); - add_to_vector(v, 2, positive_number); - } - else if (positive_number <= (std::numeric_limits::max)()) - { - // int 32 - v.push_back(0x3a); - add_to_vector(v, 4, positive_number); - } - else - { - // int 64 - v.push_back(0x3b); - add_to_vector(v, 8, positive_number); - } - } - break; - } - - case value_t::number_unsigned: - { - if (j.m_value.number_unsigned <= 0x17) - { - v.push_back(static_cast(j.m_value.number_unsigned)); - } - else if (j.m_value.number_unsigned <= 0xff) - { - v.push_back(0x18); - // one-byte uint8_t - add_to_vector(v, 1, j.m_value.number_unsigned); - } - else if (j.m_value.number_unsigned <= 0xffff) - { - v.push_back(0x19); - // two-byte uint16_t - add_to_vector(v, 2, j.m_value.number_unsigned); - } - else if (j.m_value.number_unsigned <= 0xffffffff) - { - v.push_back(0x1a); - // four-byte uint32_t - add_to_vector(v, 4, j.m_value.number_unsigned); - } - else if (j.m_value.number_unsigned <= 0xffffffffffffffff) - { - v.push_back(0x1b); - // eight-byte uint64_t - add_to_vector(v, 8, j.m_value.number_unsigned); - } - break; - } - - case value_t::number_float: - { - // Double-Precision Float - v.push_back(0xfb); - const auto* helper = reinterpret_cast(&(j.m_value.number_float)); - for (size_t i = 0; i < 8; ++i) - { - v.push_back(helper[7 - i]); - } - break; - } - - case value_t::string: - { - const auto N = j.m_value.string->size(); - if (N <= 0x17) - { - v.push_back(static_cast(0x60 + N)); // 1 byte for string + size - } - else if (N <= 0xff) - { - v.push_back(0x78); // one-byte uint8_t for N - add_to_vector(v, 1, N); - } - else if (N <= 0xffff) - { - v.push_back(0x79); // two-byte uint16_t for N - add_to_vector(v, 2, N); - } - else if (N <= 0xffffffff) - { - v.push_back(0x7a); // four-byte uint32_t for N - add_to_vector(v, 4, N); - } - // LCOV_EXCL_START - else if (N <= 0xffffffffffffffff) - { - v.push_back(0x7b); // eight-byte uint64_t for N - add_to_vector(v, 8, N); - } - // LCOV_EXCL_STOP - - // append string - std::copy(j.m_value.string->begin(), j.m_value.string->end(), - std::back_inserter(v)); - break; - } - - case value_t::array: - { - const auto N = j.m_value.array->size(); - if (N <= 0x17) - { - v.push_back(static_cast(0x80 + N)); // 1 byte for array + size - } - else if (N <= 0xff) - { - v.push_back(0x98); // one-byte uint8_t for N - add_to_vector(v, 1, N); - } - else if (N <= 0xffff) - { - v.push_back(0x99); // two-byte uint16_t for N - add_to_vector(v, 2, N); - } - else if (N <= 0xffffffff) - { - v.push_back(0x9a); // four-byte uint32_t for N - add_to_vector(v, 4, N); - } - // LCOV_EXCL_START - else if (N <= 0xffffffffffffffff) - { - v.push_back(0x9b); // eight-byte uint64_t for N - add_to_vector(v, 8, N); - } - // LCOV_EXCL_STOP - - // append each element - for (const auto& el : *j.m_value.array) - { - to_cbor_internal(el, v); - } - break; - } - - case value_t::object: - { - const auto N = j.m_value.object->size(); - if (N <= 0x17) - { - v.push_back(static_cast(0xa0 + N)); // 1 byte for object + size - } - else if (N <= 0xff) - { - v.push_back(0xb8); - add_to_vector(v, 1, N); // one-byte uint8_t for N - } - else if (N <= 0xffff) - { - v.push_back(0xb9); - add_to_vector(v, 2, N); // two-byte uint16_t for N - } - else if (N <= 0xffffffff) - { - v.push_back(0xba); - add_to_vector(v, 4, N); // four-byte uint32_t for N - } - // LCOV_EXCL_START - else if (N <= 0xffffffffffffffff) - { - v.push_back(0xbb); - add_to_vector(v, 8, N); // eight-byte uint64_t for N - } - // LCOV_EXCL_STOP - - // append each element - for (const auto& el : *j.m_value.object) - { - to_cbor_internal(el.first, v); - to_cbor_internal(el.second, v); - } - break; - } - - default: - { - break; - } - } - } - - - /* - @brief checks if given lengths do not exceed the size of a given vector - - To secure the access to the byte vector during CBOR/MessagePack - deserialization, bytes are copied from the vector into buffers. This - function checks if the number of bytes to copy (@a len) does not exceed - the size @s size of the vector. Additionally, an @a offset is given from - where to start reading the bytes. - - This function checks whether reading the bytes is safe; that is, offset is - a valid index in the vector, offset+len - - @param[in] size size of the byte vector - @param[in] len number of bytes to read - @param[in] offset offset where to start reading - - vec: x x x x x X X X X X - ^ ^ ^ - 0 offset len - - @throws out_of_range if `len > v.size()` - */ - static void check_length(const size_t size, const size_t len, const size_t offset) - { - // simple case: requested length is greater than the vector's length - if (len > size or offset > size) - { - JSON_THROW(parse_error(110, offset + 1, "cannot read " + std::to_string(len) + " bytes from vector")); - } - - // second case: adding offset would result in overflow - if ((size > ((std::numeric_limits::max)() - offset))) - { - JSON_THROW(parse_error(110, offset + 1, "cannot read " + std::to_string(len) + " bytes from vector")); - } - - // last case: reading past the end of the vector - if (len + offset > size) - { - JSON_THROW(parse_error(110, offset + 1, "cannot read " + std::to_string(len) + " bytes from vector")); - } - } - - /*! - @brief check if the next byte belongs to a string - - While parsing a map, the keys must be strings. This function checks if the - current byte is one of the start bytes for a string in MessagePack: - - - 0xa0 - 0xbf: fixstr - - 0xd9: str 8 - - 0xda: str 16 - - 0xdb: str 32 - - @param[in] v MessagePack serialization - @param[in] idx byte index in @a v to check for a string - - @throw parse_error.113 if `v[idx]` does not belong to a string - */ - static void msgpack_expect_string(const std::vector& v, size_t idx) - { - check_length(v.size(), 1, idx); - - const auto byte = v[idx]; - if ((byte >= 0xa0 and byte <= 0xbf) or (byte >= 0xd9 and byte <= 0xdb)) - { - return; - } - - std::stringstream ss; - ss << std::hex << static_cast(v[idx]); - JSON_THROW(parse_error(113, idx + 1, "expected a MessagePack string; last byte: 0x" + ss.str())); - } - - /*! - @brief check if the next byte belongs to a string - - While parsing a map, the keys must be strings. This function checks if the - current byte is one of the start bytes for a string in CBOR: - - - 0x60 - 0x77: fixed length - - 0x78 - 0x7b: variable length - - 0x7f: indefinity length - - @param[in] v CBOR serialization - @param[in] idx byte index in @a v to check for a string - - @throw parse_error.113 if `v[idx]` does not belong to a string - */ - static void cbor_expect_string(const std::vector& v, size_t idx) - { - check_length(v.size(), 1, idx); - - const auto byte = v[idx]; - if ((byte >= 0x60 and byte <= 0x7b) or byte == 0x7f) - { - return; - } - - std::stringstream ss; - ss << std::hex << static_cast(v[idx]); - JSON_THROW(parse_error(113, idx + 1, "expected a CBOR string; last byte: 0x" + ss.str())); - } - - /*! - @brief create a JSON value from a given MessagePack vector - - @param[in] v MessagePack serialization - @param[in] idx byte index to start reading from @a v - - @return deserialized JSON value - - @throw parse_error.110 if the given vector ends prematurely - @throw parse_error.112 if unsupported features from MessagePack were - used in the given vector @a v or if the input is not valid MessagePack - @throw parse_error.113 if a string was expected as map key, but not found - - @sa https://github.com/msgpack/msgpack/blob/master/spec.md - */ - static basic_json from_msgpack_internal(const std::vector& v, size_t& idx) - { - // store and increment index - const size_t current_idx = idx++; - - // make sure reading 1 byte is safe - check_length(v.size(), 1, current_idx); - - if (v[current_idx] <= 0xbf) - { - if (v[current_idx] <= 0x7f) // positive fixint - { - return v[current_idx]; - } - if (v[current_idx] <= 0x8f) // fixmap - { - basic_json result = value_t::object; - const size_t len = v[current_idx] & 0x0f; - for (size_t i = 0; i < len; ++i) - { - msgpack_expect_string(v, idx); - std::string key = from_msgpack_internal(v, idx); - result[key] = from_msgpack_internal(v, idx); - } - return result; - } - else if (v[current_idx] <= 0x9f) // fixarray - { - basic_json result = value_t::array; - const size_t len = v[current_idx] & 0x0f; - for (size_t i = 0; i < len; ++i) - { - result.push_back(from_msgpack_internal(v, idx)); - } - return result; - } - else // fixstr - { - const size_t len = v[current_idx] & 0x1f; - const size_t offset = current_idx + 1; - idx += len; // skip content bytes - check_length(v.size(), len, offset); - return std::string(reinterpret_cast(v.data()) + offset, len); - } - } - else if (v[current_idx] >= 0xe0) // negative fixint - { - return static_cast(v[current_idx]); - } - else - { - switch (v[current_idx]) - { - case 0xc0: // nil - { - return value_t::null; - } - - case 0xc2: // false - { - return false; - } - - case 0xc3: // true - { - return true; - } - - case 0xca: // float 32 - { - // copy bytes in reverse order into the double variable - float res; - check_length(v.size(), sizeof(float), current_idx + 1); - for (size_t byte = 0; byte < sizeof(float); ++byte) - { - reinterpret_cast(&res)[sizeof(float) - byte - 1] = v[current_idx + 1 + byte]; - } - idx += sizeof(float); // skip content bytes - return res; - } - - case 0xcb: // float 64 - { - // copy bytes in reverse order into the double variable - double res; - check_length(v.size(), sizeof(double), current_idx + 1); - for (size_t byte = 0; byte < sizeof(double); ++byte) - { - reinterpret_cast(&res)[sizeof(double) - byte - 1] = v[current_idx + 1 + byte]; - } - idx += sizeof(double); // skip content bytes - return res; - } - - case 0xcc: // uint 8 - { - idx += 1; // skip content byte - return get_from_vector(v, current_idx); - } - - case 0xcd: // uint 16 - { - idx += 2; // skip 2 content bytes - return get_from_vector(v, current_idx); - } - - case 0xce: // uint 32 - { - idx += 4; // skip 4 content bytes - return get_from_vector(v, current_idx); - } - - case 0xcf: // uint 64 - { - idx += 8; // skip 8 content bytes - return get_from_vector(v, current_idx); - } - - case 0xd0: // int 8 - { - idx += 1; // skip content byte - return get_from_vector(v, current_idx); - } - - case 0xd1: // int 16 - { - idx += 2; // skip 2 content bytes - return get_from_vector(v, current_idx); - } - - case 0xd2: // int 32 - { - idx += 4; // skip 4 content bytes - return get_from_vector(v, current_idx); - } - - case 0xd3: // int 64 - { - idx += 8; // skip 8 content bytes - return get_from_vector(v, current_idx); - } - - case 0xd9: // str 8 - { - const auto len = static_cast(get_from_vector(v, current_idx)); - const size_t offset = current_idx + 2; - idx += len + 1; // skip size byte + content bytes - check_length(v.size(), len, offset); - return std::string(reinterpret_cast(v.data()) + offset, len); - } - - case 0xda: // str 16 - { - const auto len = static_cast(get_from_vector(v, current_idx)); - const size_t offset = current_idx + 3; - idx += len + 2; // skip 2 size bytes + content bytes - check_length(v.size(), len, offset); - return std::string(reinterpret_cast(v.data()) + offset, len); - } - - case 0xdb: // str 32 - { - const auto len = static_cast(get_from_vector(v, current_idx)); - const size_t offset = current_idx + 5; - idx += len + 4; // skip 4 size bytes + content bytes - check_length(v.size(), len, offset); - return std::string(reinterpret_cast(v.data()) + offset, len); - } - - case 0xdc: // array 16 - { - basic_json result = value_t::array; - const auto len = static_cast(get_from_vector(v, current_idx)); - idx += 2; // skip 2 size bytes - for (size_t i = 0; i < len; ++i) - { - result.push_back(from_msgpack_internal(v, idx)); - } - return result; - } - - case 0xdd: // array 32 - { - basic_json result = value_t::array; - const auto len = static_cast(get_from_vector(v, current_idx)); - idx += 4; // skip 4 size bytes - for (size_t i = 0; i < len; ++i) - { - result.push_back(from_msgpack_internal(v, idx)); - } - return result; - } - - case 0xde: // map 16 - { - basic_json result = value_t::object; - const auto len = static_cast(get_from_vector(v, current_idx)); - idx += 2; // skip 2 size bytes - for (size_t i = 0; i < len; ++i) - { - msgpack_expect_string(v, idx); - std::string key = from_msgpack_internal(v, idx); - result[key] = from_msgpack_internal(v, idx); - } - return result; - } - - case 0xdf: // map 32 - { - basic_json result = value_t::object; - const auto len = static_cast(get_from_vector(v, current_idx)); - idx += 4; // skip 4 size bytes - for (size_t i = 0; i < len; ++i) - { - msgpack_expect_string(v, idx); - std::string key = from_msgpack_internal(v, idx); - result[key] = from_msgpack_internal(v, idx); - } - return result; - } - - default: - { - std::stringstream ss; - ss << std::hex << static_cast(v[current_idx]); - JSON_THROW(parse_error(112, current_idx + 1, "error reading MessagePack; last byte: 0x" + ss.str())); - } - } - } - } - - /*! - @brief create a JSON value from a given CBOR vector - - @param[in] v CBOR serialization - @param[in] idx byte index to start reading from @a v - - @return deserialized JSON value - - @throw parse_error.110 if the given vector ends prematurely - @throw parse_error.112 if unsupported features from CBOR were - used in the given vector @a v or if the input is not valid CBOR - @throw parse_error.113 if a string was expected as map key, but not found - - @sa https://tools.ietf.org/html/rfc7049 - */ - static basic_json from_cbor_internal(const std::vector& v, size_t& idx) - { - // store and increment index - const size_t current_idx = idx++; - - // make sure reading 1 byte is safe - check_length(v.size(), 1, current_idx); - - switch (v[current_idx]) - { - // Integer 0x00..0x17 (0..23) - case 0x00: - case 0x01: - case 0x02: - case 0x03: - case 0x04: - case 0x05: - case 0x06: - case 0x07: - case 0x08: - case 0x09: - case 0x0a: - case 0x0b: - case 0x0c: - case 0x0d: - case 0x0e: - case 0x0f: - case 0x10: - case 0x11: - case 0x12: - case 0x13: - case 0x14: - case 0x15: - case 0x16: - case 0x17: - { - return v[current_idx]; - } - - case 0x18: // Unsigned integer (one-byte uint8_t follows) - { - idx += 1; // skip content byte - return get_from_vector(v, current_idx); - } - - case 0x19: // Unsigned integer (two-byte uint16_t follows) - { - idx += 2; // skip 2 content bytes - return get_from_vector(v, current_idx); - } - - case 0x1a: // Unsigned integer (four-byte uint32_t follows) - { - idx += 4; // skip 4 content bytes - return get_from_vector(v, current_idx); - } - - case 0x1b: // Unsigned integer (eight-byte uint64_t follows) - { - idx += 8; // skip 8 content bytes - return get_from_vector(v, current_idx); - } - - // Negative integer -1-0x00..-1-0x17 (-1..-24) - case 0x20: - case 0x21: - case 0x22: - case 0x23: - case 0x24: - case 0x25: - case 0x26: - case 0x27: - case 0x28: - case 0x29: - case 0x2a: - case 0x2b: - case 0x2c: - case 0x2d: - case 0x2e: - case 0x2f: - case 0x30: - case 0x31: - case 0x32: - case 0x33: - case 0x34: - case 0x35: - case 0x36: - case 0x37: - { - return static_cast(0x20 - 1 - v[current_idx]); - } - - case 0x38: // Negative integer (one-byte uint8_t follows) - { - idx += 1; // skip content byte - // must be uint8_t ! - return static_cast(-1) - get_from_vector(v, current_idx); - } - - case 0x39: // Negative integer -1-n (two-byte uint16_t follows) - { - idx += 2; // skip 2 content bytes - return static_cast(-1) - get_from_vector(v, current_idx); - } - - case 0x3a: // Negative integer -1-n (four-byte uint32_t follows) - { - idx += 4; // skip 4 content bytes - return static_cast(-1) - get_from_vector(v, current_idx); - } - - case 0x3b: // Negative integer -1-n (eight-byte uint64_t follows) - { - idx += 8; // skip 8 content bytes - return static_cast(-1) - static_cast(get_from_vector(v, current_idx)); - } - - // UTF-8 string (0x00..0x17 bytes follow) - case 0x60: - case 0x61: - case 0x62: - case 0x63: - case 0x64: - case 0x65: - case 0x66: - case 0x67: - case 0x68: - case 0x69: - case 0x6a: - case 0x6b: - case 0x6c: - case 0x6d: - case 0x6e: - case 0x6f: - case 0x70: - case 0x71: - case 0x72: - case 0x73: - case 0x74: - case 0x75: - case 0x76: - case 0x77: - { - const auto len = static_cast(v[current_idx] - 0x60); - const size_t offset = current_idx + 1; - idx += len; // skip content bytes - check_length(v.size(), len, offset); - return std::string(reinterpret_cast(v.data()) + offset, len); - } - - case 0x78: // UTF-8 string (one-byte uint8_t for n follows) - { - const auto len = static_cast(get_from_vector(v, current_idx)); - const size_t offset = current_idx + 2; - idx += len + 1; // skip size byte + content bytes - check_length(v.size(), len, offset); - return std::string(reinterpret_cast(v.data()) + offset, len); - } - - case 0x79: // UTF-8 string (two-byte uint16_t for n follow) - { - const auto len = static_cast(get_from_vector(v, current_idx)); - const size_t offset = current_idx + 3; - idx += len + 2; // skip 2 size bytes + content bytes - check_length(v.size(), len, offset); - return std::string(reinterpret_cast(v.data()) + offset, len); - } - - case 0x7a: // UTF-8 string (four-byte uint32_t for n follow) - { - const auto len = static_cast(get_from_vector(v, current_idx)); - const size_t offset = current_idx + 5; - idx += len + 4; // skip 4 size bytes + content bytes - check_length(v.size(), len, offset); - return std::string(reinterpret_cast(v.data()) + offset, len); - } - - case 0x7b: // UTF-8 string (eight-byte uint64_t for n follow) - { - const auto len = static_cast(get_from_vector(v, current_idx)); - const size_t offset = current_idx + 9; - idx += len + 8; // skip 8 size bytes + content bytes - check_length(v.size(), len, offset); - return std::string(reinterpret_cast(v.data()) + offset, len); - } - - case 0x7f: // UTF-8 string (indefinite length) - { - std::string result; - while (check_length(v.size(), 1, idx), v[idx] != 0xff) - { - string_t s = from_cbor_internal(v, idx); - result += s; - } - // skip break byte (0xFF) - idx += 1; - return result; - } - - // array (0x00..0x17 data items follow) - case 0x80: - case 0x81: - case 0x82: - case 0x83: - case 0x84: - case 0x85: - case 0x86: - case 0x87: - case 0x88: - case 0x89: - case 0x8a: - case 0x8b: - case 0x8c: - case 0x8d: - case 0x8e: - case 0x8f: - case 0x90: - case 0x91: - case 0x92: - case 0x93: - case 0x94: - case 0x95: - case 0x96: - case 0x97: - { - basic_json result = value_t::array; - const auto len = static_cast(v[current_idx] - 0x80); - for (size_t i = 0; i < len; ++i) - { - result.push_back(from_cbor_internal(v, idx)); - } - return result; - } - - case 0x98: // array (one-byte uint8_t for n follows) - { - basic_json result = value_t::array; - const auto len = static_cast(get_from_vector(v, current_idx)); - idx += 1; // skip 1 size byte - for (size_t i = 0; i < len; ++i) - { - result.push_back(from_cbor_internal(v, idx)); - } - return result; - } - - case 0x99: // array (two-byte uint16_t for n follow) - { - basic_json result = value_t::array; - const auto len = static_cast(get_from_vector(v, current_idx)); - idx += 2; // skip 4 size bytes - for (size_t i = 0; i < len; ++i) - { - result.push_back(from_cbor_internal(v, idx)); - } - return result; - } - - case 0x9a: // array (four-byte uint32_t for n follow) - { - basic_json result = value_t::array; - const auto len = static_cast(get_from_vector(v, current_idx)); - idx += 4; // skip 4 size bytes - for (size_t i = 0; i < len; ++i) - { - result.push_back(from_cbor_internal(v, idx)); - } - return result; - } - - case 0x9b: // array (eight-byte uint64_t for n follow) - { - basic_json result = value_t::array; - const auto len = static_cast(get_from_vector(v, current_idx)); - idx += 8; // skip 8 size bytes - for (size_t i = 0; i < len; ++i) - { - result.push_back(from_cbor_internal(v, idx)); - } - return result; - } - - case 0x9f: // array (indefinite length) - { - basic_json result = value_t::array; - while (check_length(v.size(), 1, idx), v[idx] != 0xff) - { - result.push_back(from_cbor_internal(v, idx)); - } - // skip break byte (0xFF) - idx += 1; - return result; - } - - // map (0x00..0x17 pairs of data items follow) - case 0xa0: - case 0xa1: - case 0xa2: - case 0xa3: - case 0xa4: - case 0xa5: - case 0xa6: - case 0xa7: - case 0xa8: - case 0xa9: - case 0xaa: - case 0xab: - case 0xac: - case 0xad: - case 0xae: - case 0xaf: - case 0xb0: - case 0xb1: - case 0xb2: - case 0xb3: - case 0xb4: - case 0xb5: - case 0xb6: - case 0xb7: - { - basic_json result = value_t::object; - const auto len = static_cast(v[current_idx] - 0xa0); - for (size_t i = 0; i < len; ++i) - { - cbor_expect_string(v, idx); - std::string key = from_cbor_internal(v, idx); - result[key] = from_cbor_internal(v, idx); - } - return result; - } - - case 0xb8: // map (one-byte uint8_t for n follows) - { - basic_json result = value_t::object; - const auto len = static_cast(get_from_vector(v, current_idx)); - idx += 1; // skip 1 size byte - for (size_t i = 0; i < len; ++i) - { - cbor_expect_string(v, idx); - std::string key = from_cbor_internal(v, idx); - result[key] = from_cbor_internal(v, idx); - } - return result; - } - - case 0xb9: // map (two-byte uint16_t for n follow) - { - basic_json result = value_t::object; - const auto len = static_cast(get_from_vector(v, current_idx)); - idx += 2; // skip 2 size bytes - for (size_t i = 0; i < len; ++i) - { - cbor_expect_string(v, idx); - std::string key = from_cbor_internal(v, idx); - result[key] = from_cbor_internal(v, idx); - } - return result; - } - - case 0xba: // map (four-byte uint32_t for n follow) - { - basic_json result = value_t::object; - const auto len = static_cast(get_from_vector(v, current_idx)); - idx += 4; // skip 4 size bytes - for (size_t i = 0; i < len; ++i) - { - cbor_expect_string(v, idx); - std::string key = from_cbor_internal(v, idx); - result[key] = from_cbor_internal(v, idx); - } - return result; - } - - case 0xbb: // map (eight-byte uint64_t for n follow) - { - basic_json result = value_t::object; - const auto len = static_cast(get_from_vector(v, current_idx)); - idx += 8; // skip 8 size bytes - for (size_t i = 0; i < len; ++i) - { - cbor_expect_string(v, idx); - std::string key = from_cbor_internal(v, idx); - result[key] = from_cbor_internal(v, idx); - } - return result; - } - - case 0xbf: // map (indefinite length) - { - basic_json result = value_t::object; - while (check_length(v.size(), 1, idx), v[idx] != 0xff) - { - cbor_expect_string(v, idx); - std::string key = from_cbor_internal(v, idx); - result[key] = from_cbor_internal(v, idx); - } - // skip break byte (0xFF) - idx += 1; - return result; - } - - case 0xf4: // false - { - return false; - } - - case 0xf5: // true - { - return true; - } - - case 0xf6: // null - { - return value_t::null; - } - - case 0xf9: // Half-Precision Float (two-byte IEEE 754) - { - idx += 2; // skip two content bytes - - // code from RFC 7049, Appendix D, Figure 3: - // As half-precision floating-point numbers were only added to - // IEEE 754 in 2008, today's programming platforms often still - // only have limited support for them. It is very easy to - // include at least decoding support for them even without such - // support. An example of a small decoder for half-precision - // floating-point numbers in the C language is shown in Fig. 3. - check_length(v.size(), 2, current_idx + 1); - const int half = (v[current_idx + 1] << 8) + v[current_idx + 2]; - const int exp = (half >> 10) & 0x1f; - const int mant = half & 0x3ff; - double val; - if (exp == 0) - { - val = std::ldexp(mant, -24); - } - else if (exp != 31) - { - val = std::ldexp(mant + 1024, exp - 25); - } - else - { - val = mant == 0 - ? std::numeric_limits::infinity() - : std::numeric_limits::quiet_NaN(); - } - return (half & 0x8000) != 0 ? -val : val; - } - - case 0xfa: // Single-Precision Float (four-byte IEEE 754) - { - // copy bytes in reverse order into the float variable - float res; - check_length(v.size(), sizeof(float), current_idx + 1); - for (size_t byte = 0; byte < sizeof(float); ++byte) - { - reinterpret_cast(&res)[sizeof(float) - byte - 1] = v[current_idx + 1 + byte]; - } - idx += sizeof(float); // skip content bytes - return res; - } - - case 0xfb: // Double-Precision Float (eight-byte IEEE 754) - { - // copy bytes in reverse order into the double variable - double res; - check_length(v.size(), sizeof(double), current_idx + 1); - for (size_t byte = 0; byte < sizeof(double); ++byte) - { - reinterpret_cast(&res)[sizeof(double) - byte - 1] = v[current_idx + 1 + byte]; - } - idx += sizeof(double); // skip content bytes - return res; - } - - default: // anything else (0xFF is handled inside the other types) - { - std::stringstream ss; - ss << std::hex << static_cast(v[current_idx]); - JSON_THROW(parse_error(112, current_idx + 1, "error reading CBOR; last byte: 0x" + ss.str())); - } - } - } - - public: - /*! - @brief create a MessagePack serialization of a given JSON value - - Serializes a given JSON value @a j to a byte vector using the MessagePack - serialization format. MessagePack is a binary serialization format which - aims to be more compact than JSON itself, yet more efficient to parse. - - The library uses the following mapping from JSON values types to - MessagePack types according to the MessagePack specification: - - JSON value type | value/range | MessagePack type | first byte - --------------- | --------------------------------- | ---------------- | ---------- - null | `null` | nil | 0xc0 - boolean | `true` | true | 0xc3 - boolean | `false` | false | 0xc2 - number_integer | -9223372036854775808..-2147483649 | int64 | 0xd3 - number_integer | -2147483648..-32769 | int32 | 0xd2 - number_integer | -32768..-129 | int16 | 0xd1 - number_integer | -128..-33 | int8 | 0xd0 - number_integer | -32..-1 | negative fixint | 0xe0..0xff - number_integer | 0..127 | positive fixint | 0x00..0x7f - number_integer | 128..255 | uint 8 | 0xcc - number_integer | 256..65535 | uint 16 | 0xcd - number_integer | 65536..4294967295 | uint 32 | 0xce - number_integer | 4294967296..18446744073709551615 | uint 64 | 0xcf - number_unsigned | 0..127 | positive fixint | 0x00..0x7f - number_unsigned | 128..255 | uint 8 | 0xcc - number_unsigned | 256..65535 | uint 16 | 0xcd - number_unsigned | 65536..4294967295 | uint 32 | 0xce - number_unsigned | 4294967296..18446744073709551615 | uint 64 | 0xcf - number_float | *any value* | float 64 | 0xcb - string | *length*: 0..31 | fixstr | 0xa0..0xbf - string | *length*: 32..255 | str 8 | 0xd9 - string | *length*: 256..65535 | str 16 | 0xda - string | *length*: 65536..4294967295 | str 32 | 0xdb - array | *size*: 0..15 | fixarray | 0x90..0x9f - array | *size*: 16..65535 | array 16 | 0xdc - array | *size*: 65536..4294967295 | array 32 | 0xdd - object | *size*: 0..15 | fix map | 0x80..0x8f - object | *size*: 16..65535 | map 16 | 0xde - object | *size*: 65536..4294967295 | map 32 | 0xdf - - @note The mapping is **complete** in the sense that any JSON value type - can be converted to a MessagePack value. - - @note The following values can **not** be converted to a MessagePack value: - - strings with more than 4294967295 bytes - - arrays with more than 4294967295 elements - - objects with more than 4294967295 elements - - @note The following MessagePack types are not used in the conversion: - - bin 8 - bin 32 (0xc4..0xc6) - - ext 8 - ext 32 (0xc7..0xc9) - - float 32 (0xca) - - fixext 1 - fixext 16 (0xd4..0xd8) - - @note Any MessagePack output created @ref to_msgpack can be successfully - parsed by @ref from_msgpack. - - @param[in] j JSON value to serialize - @return MessagePack serialization as byte vector - - @complexity Linear in the size of the JSON value @a j. - - @liveexample{The example shows the serialization of a JSON value to a byte - vector in MessagePack format.,to_msgpack} - - @sa http://msgpack.org - @sa @ref from_msgpack(const std::vector&, const size_t) for the - analogous deserialization - @sa @ref to_cbor(const basic_json& for the related CBOR format - - @since version 2.0.9 - */ - static std::vector to_msgpack(const basic_json& j) - { - std::vector result; - to_msgpack_internal(j, result); - return result; - } - - /*! - @brief create a JSON value from a byte vector in MessagePack format - - Deserializes a given byte vector @a v to a JSON value using the MessagePack - serialization format. - - The library maps MessagePack types to JSON value types as follows: - - MessagePack type | JSON value type | first byte - ---------------- | --------------- | ---------- - positive fixint | number_unsigned | 0x00..0x7f - fixmap | object | 0x80..0x8f - fixarray | array | 0x90..0x9f - fixstr | string | 0xa0..0xbf - nil | `null` | 0xc0 - false | `false` | 0xc2 - true | `true` | 0xc3 - float 32 | number_float | 0xca - float 64 | number_float | 0xcb - uint 8 | number_unsigned | 0xcc - uint 16 | number_unsigned | 0xcd - uint 32 | number_unsigned | 0xce - uint 64 | number_unsigned | 0xcf - int 8 | number_integer | 0xd0 - int 16 | number_integer | 0xd1 - int 32 | number_integer | 0xd2 - int 64 | number_integer | 0xd3 - str 8 | string | 0xd9 - str 16 | string | 0xda - str 32 | string | 0xdb - array 16 | array | 0xdc - array 32 | array | 0xdd - map 16 | object | 0xde - map 32 | object | 0xdf - negative fixint | number_integer | 0xe0-0xff - - @warning The mapping is **incomplete** in the sense that not all - MessagePack types can be converted to a JSON value. The following - MessagePack types are not supported and will yield parse errors: - - bin 8 - bin 32 (0xc4..0xc6) - - ext 8 - ext 32 (0xc7..0xc9) - - fixext 1 - fixext 16 (0xd4..0xd8) - - @note Any MessagePack output created @ref to_msgpack can be successfully - parsed by @ref from_msgpack. - - @param[in] v a byte vector in MessagePack format - @param[in] start_index the index to start reading from @a v (0 by default) - @return deserialized JSON value - - @throw parse_error.110 if the given vector ends prematurely - @throw parse_error.112 if unsupported features from MessagePack were - used in the given vector @a v or if the input is not valid MessagePack - @throw parse_error.113 if a string was expected as map key, but not found - - @complexity Linear in the size of the byte vector @a v. - - @liveexample{The example shows the deserialization of a byte vector in - MessagePack format to a JSON value.,from_msgpack} - - @sa http://msgpack.org - @sa @ref to_msgpack(const basic_json&) for the analogous serialization - @sa @ref from_cbor(const std::vector&, const size_t) for the - related CBOR format - - @since version 2.0.9, parameter @a start_index since 2.1.1 - */ - static basic_json from_msgpack(const std::vector& v, - const size_t start_index = 0) - { - size_t i = start_index; - return from_msgpack_internal(v, i); - } - - /*! - @brief create a MessagePack serialization of a given JSON value - - Serializes a given JSON value @a j to a byte vector using the CBOR (Concise - Binary Object Representation) serialization format. CBOR is a binary - serialization format which aims to be more compact than JSON itself, yet - more efficient to parse. - - The library uses the following mapping from JSON values types to - CBOR types according to the CBOR specification (RFC 7049): - - JSON value type | value/range | CBOR type | first byte - --------------- | ------------------------------------------ | ---------------------------------- | --------------- - null | `null` | Null | 0xf6 - boolean | `true` | True | 0xf5 - boolean | `false` | False | 0xf4 - number_integer | -9223372036854775808..-2147483649 | Negative integer (8 bytes follow) | 0x3b - number_integer | -2147483648..-32769 | Negative integer (4 bytes follow) | 0x3a - number_integer | -32768..-129 | Negative integer (2 bytes follow) | 0x39 - number_integer | -128..-25 | Negative integer (1 byte follow) | 0x38 - number_integer | -24..-1 | Negative integer | 0x20..0x37 - number_integer | 0..23 | Integer | 0x00..0x17 - number_integer | 24..255 | Unsigned integer (1 byte follow) | 0x18 - number_integer | 256..65535 | Unsigned integer (2 bytes follow) | 0x19 - number_integer | 65536..4294967295 | Unsigned integer (4 bytes follow) | 0x1a - number_integer | 4294967296..18446744073709551615 | Unsigned integer (8 bytes follow) | 0x1b - number_unsigned | 0..23 | Integer | 0x00..0x17 - number_unsigned | 24..255 | Unsigned integer (1 byte follow) | 0x18 - number_unsigned | 256..65535 | Unsigned integer (2 bytes follow) | 0x19 - number_unsigned | 65536..4294967295 | Unsigned integer (4 bytes follow) | 0x1a - number_unsigned | 4294967296..18446744073709551615 | Unsigned integer (8 bytes follow) | 0x1b - number_float | *any value* | Double-Precision Float | 0xfb - string | *length*: 0..23 | UTF-8 string | 0x60..0x77 - string | *length*: 23..255 | UTF-8 string (1 byte follow) | 0x78 - string | *length*: 256..65535 | UTF-8 string (2 bytes follow) | 0x79 - string | *length*: 65536..4294967295 | UTF-8 string (4 bytes follow) | 0x7a - string | *length*: 4294967296..18446744073709551615 | UTF-8 string (8 bytes follow) | 0x7b - array | *size*: 0..23 | array | 0x80..0x97 - array | *size*: 23..255 | array (1 byte follow) | 0x98 - array | *size*: 256..65535 | array (2 bytes follow) | 0x99 - array | *size*: 65536..4294967295 | array (4 bytes follow) | 0x9a - array | *size*: 4294967296..18446744073709551615 | array (8 bytes follow) | 0x9b - object | *size*: 0..23 | map | 0xa0..0xb7 - object | *size*: 23..255 | map (1 byte follow) | 0xb8 - object | *size*: 256..65535 | map (2 bytes follow) | 0xb9 - object | *size*: 65536..4294967295 | map (4 bytes follow) | 0xba - object | *size*: 4294967296..18446744073709551615 | map (8 bytes follow) | 0xbb - - @note The mapping is **complete** in the sense that any JSON value type - can be converted to a CBOR value. - - @note The following CBOR types are not used in the conversion: - - byte strings (0x40..0x5f) - - UTF-8 strings terminated by "break" (0x7f) - - arrays terminated by "break" (0x9f) - - maps terminated by "break" (0xbf) - - date/time (0xc0..0xc1) - - bignum (0xc2..0xc3) - - decimal fraction (0xc4) - - bigfloat (0xc5) - - tagged items (0xc6..0xd4, 0xd8..0xdb) - - expected conversions (0xd5..0xd7) - - simple values (0xe0..0xf3, 0xf8) - - undefined (0xf7) - - half and single-precision floats (0xf9-0xfa) - - break (0xff) - - @param[in] j JSON value to serialize - @return MessagePack serialization as byte vector - - @complexity Linear in the size of the JSON value @a j. - - @liveexample{The example shows the serialization of a JSON value to a byte - vector in CBOR format.,to_cbor} - - @sa http://cbor.io - @sa @ref from_cbor(const std::vector&, const size_t) for the - analogous deserialization - @sa @ref to_msgpack(const basic_json& for the related MessagePack format - - @since version 2.0.9 - */ - static std::vector to_cbor(const basic_json& j) - { - std::vector result; - to_cbor_internal(j, result); - return result; - } - - /*! - @brief create a JSON value from a byte vector in CBOR format - - Deserializes a given byte vector @a v to a JSON value using the CBOR - (Concise Binary Object Representation) serialization format. - - The library maps CBOR types to JSON value types as follows: - - CBOR type | JSON value type | first byte - ---------------------- | --------------- | ---------- - Integer | number_unsigned | 0x00..0x17 - Unsigned integer | number_unsigned | 0x18 - Unsigned integer | number_unsigned | 0x19 - Unsigned integer | number_unsigned | 0x1a - Unsigned integer | number_unsigned | 0x1b - Negative integer | number_integer | 0x20..0x37 - Negative integer | number_integer | 0x38 - Negative integer | number_integer | 0x39 - Negative integer | number_integer | 0x3a - Negative integer | number_integer | 0x3b - Negative integer | number_integer | 0x40..0x57 - UTF-8 string | string | 0x60..0x77 - UTF-8 string | string | 0x78 - UTF-8 string | string | 0x79 - UTF-8 string | string | 0x7a - UTF-8 string | string | 0x7b - UTF-8 string | string | 0x7f - array | array | 0x80..0x97 - array | array | 0x98 - array | array | 0x99 - array | array | 0x9a - array | array | 0x9b - array | array | 0x9f - map | object | 0xa0..0xb7 - map | object | 0xb8 - map | object | 0xb9 - map | object | 0xba - map | object | 0xbb - map | object | 0xbf - False | `false` | 0xf4 - True | `true` | 0xf5 - Nill | `null` | 0xf6 - Half-Precision Float | number_float | 0xf9 - Single-Precision Float | number_float | 0xfa - Double-Precision Float | number_float | 0xfb - - @warning The mapping is **incomplete** in the sense that not all CBOR - types can be converted to a JSON value. The following CBOR types - are not supported and will yield parse errors (parse_error.112): - - byte strings (0x40..0x5f) - - date/time (0xc0..0xc1) - - bignum (0xc2..0xc3) - - decimal fraction (0xc4) - - bigfloat (0xc5) - - tagged items (0xc6..0xd4, 0xd8..0xdb) - - expected conversions (0xd5..0xd7) - - simple values (0xe0..0xf3, 0xf8) - - undefined (0xf7) - - @warning CBOR allows map keys of any type, whereas JSON only allows - strings as keys in object values. Therefore, CBOR maps with keys - other than UTF-8 strings are rejected (parse_error.113). - - @note Any CBOR output created @ref to_cbor can be successfully parsed by - @ref from_cbor. - - @param[in] v a byte vector in CBOR format - @param[in] start_index the index to start reading from @a v (0 by default) - @return deserialized JSON value - - @throw parse_error.110 if the given vector ends prematurely - @throw parse_error.112 if unsupported features from CBOR were - used in the given vector @a v or if the input is not valid CBOR - @throw parse_error.113 if a string was expected as map key, but not found - - @complexity Linear in the size of the byte vector @a v. - - @liveexample{The example shows the deserialization of a byte vector in CBOR - format to a JSON value.,from_cbor} - - @sa http://cbor.io - @sa @ref to_cbor(const basic_json&) for the analogous serialization - @sa @ref from_msgpack(const std::vector&, const size_t) for the - related MessagePack format - - @since version 2.0.9, parameter @a start_index since 2.1.1 - */ - static basic_json from_cbor(const std::vector& v, - const size_t start_index = 0) - { - size_t i = start_index; - return from_cbor_internal(v, i); - } - - /// @} - - /////////////////////////// - // convenience functions // - /////////////////////////// - - /*! - @brief return the type as string - - Returns the type name as string to be used in error messages - usually to - indicate that a function was called on a wrong JSON type. - - @return basically a string representation of a the @a m_type member - - @complexity Constant. - - @liveexample{The following code exemplifies `type_name()` for all JSON - types.,type_name} - - @since version 1.0.0, public since 2.1.0 - */ - std::string type_name() const - { - { - switch (m_type) - { - case value_t::null: - return "null"; - case value_t::object: - return "object"; - case value_t::array: - return "array"; - case value_t::string: - return "string"; - case value_t::boolean: - return "boolean"; - case value_t::discarded: - return "discarded"; - default: - return "number"; - } - } - } - - - private: - ////////////////////// - // member variables // - ////////////////////// - - /// the type of the current element - value_t m_type = value_t::null; - - /// the value of the current element - json_value m_value = {}; - - - private: - /////////////// - // iterators // - /////////////// - - /*! - @brief an iterator for primitive JSON types - - This class models an iterator for primitive JSON types (boolean, number, - string). It's only purpose is to allow the iterator/const_iterator classes - to "iterate" over primitive values. Internally, the iterator is modeled by - a `difference_type` variable. Value begin_value (`0`) models the begin, - end_value (`1`) models past the end. - */ - class primitive_iterator_t - { - public: - - difference_type get_value() const noexcept - { - return m_it; - } - /// set iterator to a defined beginning - void set_begin() noexcept - { - m_it = begin_value; - } - - /// set iterator to a defined past the end - void set_end() noexcept - { - m_it = end_value; - } - - /// return whether the iterator can be dereferenced - constexpr bool is_begin() const noexcept - { - return (m_it == begin_value); - } - - /// return whether the iterator is at end - constexpr bool is_end() const noexcept - { - return (m_it == end_value); - } - - friend constexpr bool operator==(primitive_iterator_t lhs, primitive_iterator_t rhs) noexcept - { - return lhs.m_it == rhs.m_it; - } - - friend constexpr bool operator!=(primitive_iterator_t lhs, primitive_iterator_t rhs) noexcept - { - return !(lhs == rhs); - } - - friend constexpr bool operator<(primitive_iterator_t lhs, primitive_iterator_t rhs) noexcept - { - return lhs.m_it < rhs.m_it; - } - - friend constexpr bool operator<=(primitive_iterator_t lhs, primitive_iterator_t rhs) noexcept - { - return lhs.m_it <= rhs.m_it; - } - - friend constexpr bool operator>(primitive_iterator_t lhs, primitive_iterator_t rhs) noexcept - { - return lhs.m_it > rhs.m_it; - } - - friend constexpr bool operator>=(primitive_iterator_t lhs, primitive_iterator_t rhs) noexcept - { - return lhs.m_it >= rhs.m_it; - } - - primitive_iterator_t operator+(difference_type i) - { - auto result = *this; - result += i; - return result; - } - - friend constexpr difference_type operator-(primitive_iterator_t lhs, primitive_iterator_t rhs) noexcept - { - return lhs.m_it - rhs.m_it; - } - - friend std::ostream& operator<<(std::ostream& os, primitive_iterator_t it) - { - return os << it.m_it; - } - - primitive_iterator_t& operator++() - { - ++m_it; - return *this; - } - - primitive_iterator_t operator++(int) - { - auto result = *this; - m_it++; - return result; - } - - primitive_iterator_t& operator--() - { - --m_it; - return *this; - } - - primitive_iterator_t operator--(int) - { - auto result = *this; - m_it--; - return result; - } - - primitive_iterator_t& operator+=(difference_type n) - { - m_it += n; - return *this; - } - - primitive_iterator_t& operator-=(difference_type n) - { - m_it -= n; - return *this; - } - - private: - static constexpr difference_type begin_value = 0; - static constexpr difference_type end_value = begin_value + 1; - - /// iterator as signed integer type - difference_type m_it = std::numeric_limits::denorm_min(); - }; - - /*! - @brief an iterator value - - @note This structure could easily be a union, but MSVC currently does not - allow unions members with complex constructors, see - https://github.com/nlohmann/json/pull/105. - */ - struct internal_iterator - { - /// iterator for JSON objects - typename object_t::iterator object_iterator; - /// iterator for JSON arrays - typename array_t::iterator array_iterator; - /// generic iterator for all other types - primitive_iterator_t primitive_iterator; - - /// create an uninitialized internal_iterator - internal_iterator() noexcept - : object_iterator(), array_iterator(), primitive_iterator() - {} - }; - - /// proxy class for the iterator_wrapper functions - template - class iteration_proxy - { - private: - /// helper class for iteration - class iteration_proxy_internal - { - private: - /// the iterator - IteratorType anchor; - /// an index for arrays (used to create key names) - size_t array_index = 0; - - public: - explicit iteration_proxy_internal(IteratorType it) noexcept - : anchor(it) - {} - - /// dereference operator (needed for range-based for) - iteration_proxy_internal& operator*() - { - return *this; - } - - /// increment operator (needed for range-based for) - iteration_proxy_internal& operator++() - { - ++anchor; - ++array_index; - - return *this; - } - - /// inequality operator (needed for range-based for) - bool operator!= (const iteration_proxy_internal& o) const - { - return anchor != o.anchor; - } - - /// return key of the iterator - typename basic_json::string_t key() const - { - assert(anchor.m_object != nullptr); - - switch (anchor.m_object->type()) - { - // use integer array index as key - case value_t::array: - { - return std::to_string(array_index); - } - - // use key from the object - case value_t::object: - { - return anchor.key(); - } - - // use an empty key for all primitive types - default: - { - return ""; - } - } - } - - /// return value of the iterator - typename IteratorType::reference value() const - { - return anchor.value(); - } - }; - - /// the container to iterate - typename IteratorType::reference container; - - public: - /// construct iteration proxy from a container - explicit iteration_proxy(typename IteratorType::reference cont) - : container(cont) - {} - - /// return iterator begin (needed for range-based for) - iteration_proxy_internal begin() noexcept - { - return iteration_proxy_internal(container.begin()); - } - - /// return iterator end (needed for range-based for) - iteration_proxy_internal end() noexcept - { - return iteration_proxy_internal(container.end()); - } - }; - - public: - /*! - @brief a template for a random access iterator for the @ref basic_json class - - This class implements a both iterators (iterator and const_iterator) for the - @ref basic_json class. - - @note An iterator is called *initialized* when a pointer to a JSON value - has been set (e.g., by a constructor or a copy assignment). If the - iterator is default-constructed, it is *uninitialized* and most - methods are undefined. **The library uses assertions to detect calls - on uninitialized iterators.** - - @requirement The class satisfies the following concept requirements: - - [RandomAccessIterator](http://en.cppreference.com/w/cpp/concept/RandomAccessIterator): - The iterator that can be moved to point (forward and backward) to any - element in constant time. - - @since version 1.0.0, simplified in version 2.0.9 - */ - template - class iter_impl : public std::iterator - { - /// allow basic_json to access private members - friend class basic_json; - - // make sure U is basic_json or const basic_json - static_assert(std::is_same::value - or std::is_same::value, - "iter_impl only accepts (const) basic_json"); - - public: - /// the type of the values when the iterator is dereferenced - using value_type = typename basic_json::value_type; - /// a type to represent differences between iterators - using difference_type = typename basic_json::difference_type; - /// defines a pointer to the type iterated over (value_type) - using pointer = typename std::conditional::value, - typename basic_json::const_pointer, - typename basic_json::pointer>::type; - /// defines a reference to the type iterated over (value_type) - using reference = typename std::conditional::value, - typename basic_json::const_reference, - typename basic_json::reference>::type; - /// the category of the iterator - using iterator_category = std::bidirectional_iterator_tag; - - /// default constructor - iter_impl() = default; - - /*! - @brief constructor for a given JSON instance - @param[in] object pointer to a JSON object for this iterator - @pre object != nullptr - @post The iterator is initialized; i.e. `m_object != nullptr`. - */ - explicit iter_impl(pointer object) noexcept - : m_object(object) - { - assert(m_object != nullptr); - - switch (m_object->m_type) - { - case basic_json::value_t::object: - { - m_it.object_iterator = typename object_t::iterator(); - break; - } - - case basic_json::value_t::array: - { - m_it.array_iterator = typename array_t::iterator(); - break; - } - - default: - { - m_it.primitive_iterator = primitive_iterator_t(); - break; - } - } - } - - /* - Use operator `const_iterator` instead of `const_iterator(const iterator& - other) noexcept` to avoid two class definitions for @ref iterator and - @ref const_iterator. - - This function is only called if this class is an @ref iterator. If this - class is a @ref const_iterator this function is not called. - */ - operator const_iterator() const - { - const_iterator ret; - - if (m_object) - { - ret.m_object = m_object; - ret.m_it = m_it; - } - - return ret; - } - - /*! - @brief copy constructor - @param[in] other iterator to copy from - @note It is not checked whether @a other is initialized. - */ - iter_impl(const iter_impl& other) noexcept - : m_object(other.m_object), m_it(other.m_it) - {} - - /*! - @brief copy assignment - @param[in,out] other iterator to copy from - @note It is not checked whether @a other is initialized. - */ - iter_impl& operator=(iter_impl other) noexcept( - std::is_nothrow_move_constructible::value and - std::is_nothrow_move_assignable::value and - std::is_nothrow_move_constructible::value and - std::is_nothrow_move_assignable::value - ) - { - std::swap(m_object, other.m_object); - std::swap(m_it, other.m_it); - return *this; - } - - private: - /*! - @brief set the iterator to the first value - @pre The iterator is initialized; i.e. `m_object != nullptr`. - */ - void set_begin() noexcept - { - assert(m_object != nullptr); - - switch (m_object->m_type) - { - case basic_json::value_t::object: - { - m_it.object_iterator = m_object->m_value.object->begin(); - break; - } - - case basic_json::value_t::array: - { - m_it.array_iterator = m_object->m_value.array->begin(); - break; - } - - case basic_json::value_t::null: - { - // set to end so begin()==end() is true: null is empty - m_it.primitive_iterator.set_end(); - break; - } - - default: - { - m_it.primitive_iterator.set_begin(); - break; - } - } - } - - /*! - @brief set the iterator past the last value - @pre The iterator is initialized; i.e. `m_object != nullptr`. - */ - void set_end() noexcept - { - assert(m_object != nullptr); - - switch (m_object->m_type) - { - case basic_json::value_t::object: - { - m_it.object_iterator = m_object->m_value.object->end(); - break; - } - - case basic_json::value_t::array: - { - m_it.array_iterator = m_object->m_value.array->end(); - break; - } - - default: - { - m_it.primitive_iterator.set_end(); - break; - } - } - } - - public: - /*! - @brief return a reference to the value pointed to by the iterator - @pre The iterator is initialized; i.e. `m_object != nullptr`. - */ - reference operator*() const - { - assert(m_object != nullptr); - - switch (m_object->m_type) - { - case basic_json::value_t::object: - { - assert(m_it.object_iterator != m_object->m_value.object->end()); - return m_it.object_iterator->second; - } - - case basic_json::value_t::array: - { - assert(m_it.array_iterator != m_object->m_value.array->end()); - return *m_it.array_iterator; - } - - case basic_json::value_t::null: - { - JSON_THROW(invalid_iterator(214, "cannot get value")); - } - - default: - { - if (m_it.primitive_iterator.is_begin()) - { - return *m_object; - } - - JSON_THROW(invalid_iterator(214, "cannot get value")); - } - } - } - - /*! - @brief dereference the iterator - @pre The iterator is initialized; i.e. `m_object != nullptr`. - */ - pointer operator->() const - { - assert(m_object != nullptr); - - switch (m_object->m_type) - { - case basic_json::value_t::object: - { - assert(m_it.object_iterator != m_object->m_value.object->end()); - return &(m_it.object_iterator->second); - } - - case basic_json::value_t::array: - { - assert(m_it.array_iterator != m_object->m_value.array->end()); - return &*m_it.array_iterator; - } - - default: - { - if (m_it.primitive_iterator.is_begin()) - { - return m_object; - } - - JSON_THROW(invalid_iterator(214, "cannot get value")); - } - } - } - - /*! - @brief post-increment (it++) - @pre The iterator is initialized; i.e. `m_object != nullptr`. - */ - iter_impl operator++(int) - { - auto result = *this; - ++(*this); - return result; - } - - /*! - @brief pre-increment (++it) - @pre The iterator is initialized; i.e. `m_object != nullptr`. - */ - iter_impl& operator++() - { - assert(m_object != nullptr); - - switch (m_object->m_type) - { - case basic_json::value_t::object: - { - std::advance(m_it.object_iterator, 1); - break; - } - - case basic_json::value_t::array: - { - std::advance(m_it.array_iterator, 1); - break; - } - - default: - { - ++m_it.primitive_iterator; - break; - } - } - - return *this; - } - - /*! - @brief post-decrement (it--) - @pre The iterator is initialized; i.e. `m_object != nullptr`. - */ - iter_impl operator--(int) - { - auto result = *this; - --(*this); - return result; - } - - /*! - @brief pre-decrement (--it) - @pre The iterator is initialized; i.e. `m_object != nullptr`. - */ - iter_impl& operator--() - { - assert(m_object != nullptr); - - switch (m_object->m_type) - { - case basic_json::value_t::object: - { - std::advance(m_it.object_iterator, -1); - break; - } - - case basic_json::value_t::array: - { - std::advance(m_it.array_iterator, -1); - break; - } - - default: - { - --m_it.primitive_iterator; - break; - } - } - - return *this; - } - - /*! - @brief comparison: equal - @pre The iterator is initialized; i.e. `m_object != nullptr`. - */ - bool operator==(const iter_impl& other) const - { - // if objects are not the same, the comparison is undefined - if (m_object != other.m_object) - { - JSON_THROW(invalid_iterator(212, "cannot compare iterators of different containers")); - } - - assert(m_object != nullptr); - - switch (m_object->m_type) - { - case basic_json::value_t::object: - { - return (m_it.object_iterator == other.m_it.object_iterator); - } - - case basic_json::value_t::array: - { - return (m_it.array_iterator == other.m_it.array_iterator); - } - - default: - { - return (m_it.primitive_iterator == other.m_it.primitive_iterator); - } - } - } - - /*! - @brief comparison: not equal - @pre The iterator is initialized; i.e. `m_object != nullptr`. - */ - bool operator!=(const iter_impl& other) const - { - return not operator==(other); - } - - /*! - @brief comparison: smaller - @pre The iterator is initialized; i.e. `m_object != nullptr`. - */ - bool operator<(const iter_impl& other) const - { - // if objects are not the same, the comparison is undefined - if (m_object != other.m_object) - { - JSON_THROW(invalid_iterator(212, "cannot compare iterators of different containers")); - } - - assert(m_object != nullptr); - - switch (m_object->m_type) - { - case basic_json::value_t::object: - { - JSON_THROW(invalid_iterator(213, "cannot compare order of object iterators")); - } - - case basic_json::value_t::array: - { - return (m_it.array_iterator < other.m_it.array_iterator); - } - - default: - { - return (m_it.primitive_iterator < other.m_it.primitive_iterator); - } - } - } - - /*! - @brief comparison: less than or equal - @pre The iterator is initialized; i.e. `m_object != nullptr`. - */ - bool operator<=(const iter_impl& other) const - { - return not other.operator < (*this); - } - - /*! - @brief comparison: greater than - @pre The iterator is initialized; i.e. `m_object != nullptr`. - */ - bool operator>(const iter_impl& other) const - { - return not operator<=(other); - } - - /*! - @brief comparison: greater than or equal - @pre The iterator is initialized; i.e. `m_object != nullptr`. - */ - bool operator>=(const iter_impl& other) const - { - return not operator<(other); - } - - /*! - @brief add to iterator - @pre The iterator is initialized; i.e. `m_object != nullptr`. - */ - iter_impl& operator+=(difference_type i) - { - assert(m_object != nullptr); - - switch (m_object->m_type) - { - case basic_json::value_t::object: - { - JSON_THROW(invalid_iterator(209, "cannot use offsets with object iterators")); - } - - case basic_json::value_t::array: - { - std::advance(m_it.array_iterator, i); - break; - } - - default: - { - m_it.primitive_iterator += i; - break; - } - } - - return *this; - } - - /*! - @brief subtract from iterator - @pre The iterator is initialized; i.e. `m_object != nullptr`. - */ - iter_impl& operator-=(difference_type i) - { - return operator+=(-i); - } - - /*! - @brief add to iterator - @pre The iterator is initialized; i.e. `m_object != nullptr`. - */ - iter_impl operator+(difference_type i) - { - auto result = *this; - result += i; - return result; - } - - /*! - @brief subtract from iterator - @pre The iterator is initialized; i.e. `m_object != nullptr`. - */ - iter_impl operator-(difference_type i) - { - auto result = *this; - result -= i; - return result; - } - - /*! - @brief return difference - @pre The iterator is initialized; i.e. `m_object != nullptr`. - */ - difference_type operator-(const iter_impl& other) const - { - assert(m_object != nullptr); - - switch (m_object->m_type) - { - case basic_json::value_t::object: - { - JSON_THROW(invalid_iterator(209, "cannot use offsets with object iterators")); - } - - case basic_json::value_t::array: - { - return m_it.array_iterator - other.m_it.array_iterator; - } - - default: - { - return m_it.primitive_iterator - other.m_it.primitive_iterator; - } - } - } - - /*! - @brief access to successor - @pre The iterator is initialized; i.e. `m_object != nullptr`. - */ - reference operator[](difference_type n) const - { - assert(m_object != nullptr); - - switch (m_object->m_type) - { - case basic_json::value_t::object: - { - JSON_THROW(invalid_iterator(208, "cannot use operator[] for object iterators")); - } - - case basic_json::value_t::array: - { - return *std::next(m_it.array_iterator, n); - } - - case basic_json::value_t::null: - { - JSON_THROW(invalid_iterator(214, "cannot get value")); - } - - default: - { - if (m_it.primitive_iterator.get_value() == -n) - { - return *m_object; - } - - JSON_THROW(invalid_iterator(214, "cannot get value")); - } - } - } - - /*! - @brief return the key of an object iterator - @pre The iterator is initialized; i.e. `m_object != nullptr`. - */ - typename object_t::key_type key() const - { - assert(m_object != nullptr); - - if (m_object->is_object()) - { - return m_it.object_iterator->first; - } - - JSON_THROW(invalid_iterator(207, "cannot use key() for non-object iterators")); - } - - /*! - @brief return the value of an iterator - @pre The iterator is initialized; i.e. `m_object != nullptr`. - */ - reference value() const - { - return operator*(); - } - - private: - /// associated JSON instance - pointer m_object = nullptr; - /// the actual iterator of the associated instance - internal_iterator m_it = internal_iterator(); - }; - - /*! - @brief a template for a reverse iterator class - - @tparam Base the base iterator type to reverse. Valid types are @ref - iterator (to create @ref reverse_iterator) and @ref const_iterator (to - create @ref const_reverse_iterator). - - @requirement The class satisfies the following concept requirements: - - [RandomAccessIterator](http://en.cppreference.com/w/cpp/concept/RandomAccessIterator): - The iterator that can be moved to point (forward and backward) to any - element in constant time. - - [OutputIterator](http://en.cppreference.com/w/cpp/concept/OutputIterator): - It is possible to write to the pointed-to element (only if @a Base is - @ref iterator). - - @since version 1.0.0 - */ - template - class json_reverse_iterator : public std::reverse_iterator - { - public: - /// shortcut to the reverse iterator adaptor - using base_iterator = std::reverse_iterator; - /// the reference type for the pointed-to element - using reference = typename Base::reference; - - /// create reverse iterator from iterator - json_reverse_iterator(const typename base_iterator::iterator_type& it) noexcept - : base_iterator(it) - {} - - /// create reverse iterator from base class - json_reverse_iterator(const base_iterator& it) noexcept - : base_iterator(it) - {} - - /// post-increment (it++) - json_reverse_iterator operator++(int) - { - return base_iterator::operator++(1); - } - - /// pre-increment (++it) - json_reverse_iterator& operator++() - { - base_iterator::operator++(); - return *this; - } - - /// post-decrement (it--) - json_reverse_iterator operator--(int) - { - return base_iterator::operator--(1); - } - - /// pre-decrement (--it) - json_reverse_iterator& operator--() - { - base_iterator::operator--(); - return *this; - } - - /// add to iterator - json_reverse_iterator& operator+=(difference_type i) - { - base_iterator::operator+=(i); - return *this; - } - - /// add to iterator - json_reverse_iterator operator+(difference_type i) const - { - auto result = *this; - result += i; - return result; - } - - /// subtract from iterator - json_reverse_iterator operator-(difference_type i) const - { - auto result = *this; - result -= i; - return result; - } - - /// return difference - difference_type operator-(const json_reverse_iterator& other) const - { - return this->base() - other.base(); - } - - /// access to successor - reference operator[](difference_type n) const - { - return *(this->operator+(n)); - } - - /// return the key of an object iterator - typename object_t::key_type key() const - { - auto it = --this->base(); - return it.key(); - } - - /// return the value of an iterator - reference value() const - { - auto it = --this->base(); - return it.operator * (); - } - }; - - - private: - ////////////////////// - // lexer and parser // - ////////////////////// - - /*! - @brief lexical analysis - - This class organizes the lexical analysis during JSON deserialization. The - core of it is a scanner generated by [re2c](http://re2c.org) that - processes a buffer and recognizes tokens according to RFC 7159. - */ - class lexer - { - public: - /// token types for the parser - enum class token_type - { - uninitialized, ///< indicating the scanner is uninitialized - literal_true, ///< the `true` literal - literal_false, ///< the `false` literal - literal_null, ///< the `null` literal - value_string, ///< a string -- use get_string() for actual value - value_unsigned, ///< an unsigned integer -- use get_number() for actual value - value_integer, ///< a signed integer -- use get_number() for actual value - value_float, ///< an floating point number -- use get_number() for actual value - begin_array, ///< the character for array begin `[` - begin_object, ///< the character for object begin `{` - end_array, ///< the character for array end `]` - end_object, ///< the character for object end `}` - name_separator, ///< the name separator `:` - value_separator, ///< the value separator `,` - parse_error, ///< indicating a parse error - end_of_input ///< indicating the end of the input buffer - }; - - /// the char type to use in the lexer - using lexer_char_t = unsigned char; - - /// a lexer from a buffer with given length - lexer(const lexer_char_t* buff, const size_t len) noexcept - : m_content(buff) - { - assert(m_content != nullptr); - m_start = m_cursor = m_content; - m_limit = m_content + len; - } - - /*! - @brief a lexer from an input stream - @throw parse_error.111 if input stream is in a bad state - */ - explicit lexer(std::istream& s) - : m_stream(&s), m_line_buffer() - { - // immediately abort if stream is erroneous - if (s.fail()) - { - JSON_THROW(parse_error(111, 0, "bad input stream")); - } - - // fill buffer - fill_line_buffer(); - - // skip UTF-8 byte-order mark - if (m_line_buffer.size() >= 3 and m_line_buffer.substr(0, 3) == "\xEF\xBB\xBF") - { - m_line_buffer[0] = ' '; - m_line_buffer[1] = ' '; - m_line_buffer[2] = ' '; - } - } - - // switch off unwanted functions (due to pointer members) - lexer() = delete; - lexer(const lexer&) = delete; - lexer operator=(const lexer&) = delete; - - /*! - @brief create a string from one or two Unicode code points - - There are two cases: (1) @a codepoint1 is in the Basic Multilingual - Plane (U+0000 through U+FFFF) and @a codepoint2 is 0, or (2) - @a codepoint1 and @a codepoint2 are a UTF-16 surrogate pair to - represent a code point above U+FFFF. - - @param[in] codepoint1 the code point (can be high surrogate) - @param[in] codepoint2 the code point (can be low surrogate or 0) - - @return string representation of the code point; the length of the - result string is between 1 and 4 characters. - - @throw parse_error.102 if the low surrogate is invalid; example: - `""missing or wrong low surrogate""` - @throw parse_error.103 if code point is > 0x10ffff; example: `"code - points above 0x10FFFF are invalid"` - - @complexity Constant. - - @see - */ - string_t to_unicode(const std::size_t codepoint1, - const std::size_t codepoint2 = 0) const - { - // calculate the code point from the given code points - std::size_t codepoint = codepoint1; - - // check if codepoint1 is a high surrogate - if (codepoint1 >= 0xD800 and codepoint1 <= 0xDBFF) - { - // check if codepoint2 is a low surrogate - if (codepoint2 >= 0xDC00 and codepoint2 <= 0xDFFF) - { - codepoint = - // high surrogate occupies the most significant 22 bits - (codepoint1 << 10) - // low surrogate occupies the least significant 15 bits - + codepoint2 - // there is still the 0xD800, 0xDC00 and 0x10000 noise - // in the result so we have to subtract with: - // (0xD800 << 10) + DC00 - 0x10000 = 0x35FDC00 - - 0x35FDC00; - } - else - { - JSON_THROW(parse_error(102, get_position(), "missing or wrong low surrogate")); - } - } - - string_t result; - - if (codepoint < 0x80) - { - // 1-byte characters: 0xxxxxxx (ASCII) - result.append(1, static_cast(codepoint)); - } - else if (codepoint <= 0x7ff) - { - // 2-byte characters: 110xxxxx 10xxxxxx - result.append(1, static_cast(0xC0 | ((codepoint >> 6) & 0x1F))); - result.append(1, static_cast(0x80 | (codepoint & 0x3F))); - } - else if (codepoint <= 0xffff) - { - // 3-byte characters: 1110xxxx 10xxxxxx 10xxxxxx - result.append(1, static_cast(0xE0 | ((codepoint >> 12) & 0x0F))); - result.append(1, static_cast(0x80 | ((codepoint >> 6) & 0x3F))); - result.append(1, static_cast(0x80 | (codepoint & 0x3F))); - } - else if (codepoint <= 0x10ffff) - { - // 4-byte characters: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx - result.append(1, static_cast(0xF0 | ((codepoint >> 18) & 0x07))); - result.append(1, static_cast(0x80 | ((codepoint >> 12) & 0x3F))); - result.append(1, static_cast(0x80 | ((codepoint >> 6) & 0x3F))); - result.append(1, static_cast(0x80 | (codepoint & 0x3F))); - } - else - { - JSON_THROW(parse_error(103, get_position(), "code points above 0x10FFFF are invalid")); - } - - return result; - } - - /// return name of values of type token_type (only used for errors) - static std::string token_type_name(const token_type t) - { - switch (t) - { - case token_type::uninitialized: - return ""; - case token_type::literal_true: - return "true literal"; - case token_type::literal_false: - return "false literal"; - case token_type::literal_null: - return "null literal"; - case token_type::value_string: - return "string literal"; - case lexer::token_type::value_unsigned: - case lexer::token_type::value_integer: - case lexer::token_type::value_float: - return "number literal"; - case token_type::begin_array: - return "'['"; - case token_type::begin_object: - return "'{'"; - case token_type::end_array: - return "']'"; - case token_type::end_object: - return "'}'"; - case token_type::name_separator: - return "':'"; - case token_type::value_separator: - return "','"; - case token_type::parse_error: - return ""; - case token_type::end_of_input: - return "end of input"; - default: - { - // catch non-enum values - return "unknown token"; // LCOV_EXCL_LINE - } - } - } - - /*! - This function implements a scanner for JSON. It is specified using - regular expressions that try to follow RFC 7159 as close as possible. - These regular expressions are then translated into a minimized - deterministic finite automaton (DFA) by the tool - [re2c](http://re2c.org). As a result, the translated code for this - function consists of a large block of code with `goto` jumps. - - @return the class of the next token read from the buffer - - @complexity Linear in the length of the input.\n - - Proposition: The loop below will always terminate for finite input.\n - - Proof (by contradiction): Assume a finite input. To loop forever, the - loop must never hit code with a `break` statement. The only code - snippets without a `break` statement is the continue statement for - whitespace. To loop forever, the input must be an infinite sequence - whitespace. This contradicts the assumption of finite input, q.e.d. - */ - token_type scan() - { - while (true) - { - // pointer for backtracking information - m_marker = nullptr; - - // remember the begin of the token - m_start = m_cursor; - assert(m_start != nullptr); - - /*!re2c - re2c:define:YYCTYPE = lexer_char_t; - re2c:define:YYCURSOR = m_cursor; - re2c:define:YYLIMIT = m_limit; - re2c:define:YYMARKER = m_marker; - re2c:define:YYFILL = "fill_line_buffer(@@); // LCOV_EXCL_LINE"; - re2c:define:YYFILL:naked = 1; - re2c:yyfill:enable = 1; - re2c:indent:string = " "; - re2c:indent:top = 1; - re2c:labelprefix = "basic_json_parser_"; - - // ignore whitespace - ws = [ \t\n\r]+; - ws { position += static_cast((m_cursor - m_start)); continue; } - - // structural characters - "[" { last_token_type = token_type::begin_array; break; } - "]" { last_token_type = token_type::end_array; break; } - "{" { last_token_type = token_type::begin_object; break; } - "}" { last_token_type = token_type::end_object; break; } - "," { last_token_type = token_type::value_separator; break; } - ":" { last_token_type = token_type::name_separator; break; } - - // literal names - "null" { last_token_type = token_type::literal_null; break; } - "true" { last_token_type = token_type::literal_true; break; } - "false" { last_token_type = token_type::literal_false; break; } - - // number - decimal_point = "."; - digit = [0-9]; - digit_1_9 = [1-9]; - e = "e" | "E"; - minus = "-"; - plus = "+"; - zero = "0"; - exp = e (minus | plus)? digit+; - frac = decimal_point digit+; - int = (zero | digit_1_9 digit*); - invalid_int = minus? "0" digit+; - invalid_int { last_token_type = token_type::parse_error; break; } - number_unsigned = int; - number_unsigned { last_token_type = token_type::value_unsigned; break; } - number_integer = minus int; - number_integer { last_token_type = token_type::value_integer; break; } - number_float = minus? int frac? exp?; - number_float { last_token_type = token_type::value_float; break; } - - // string - quotation_mark = "\""; - escape = "\\"; - unescaped = [^"\\\x00-\x1f]; - single_escaped = "\"" | "\\" | "/" | "b" | "f" | "n" | "r" | "t"; - unicode_escaped = "u" [0-9a-fA-F]{4}; - escaped = escape (single_escaped | unicode_escaped); - char = unescaped | escaped; - string = quotation_mark char* quotation_mark; - string { last_token_type = token_type::value_string; break; } - - // end of file - "\x00" { last_token_type = token_type::end_of_input; break; } - - // anything else is an error - * { last_token_type = token_type::parse_error; break; } - */ - } - - position += static_cast((m_cursor - m_start)); - return last_token_type; - } - - /*! - @brief append data from the stream to the line buffer - - This function is called by the scan() function when the end of the - buffer (`m_limit`) is reached and the `m_cursor` pointer cannot be - incremented without leaving the limits of the line buffer. Note re2c - decides when to call this function. - - If the lexer reads from contiguous storage, there is no trailing null - byte. Therefore, this function must make sure to add these padding - null bytes. - - If the lexer reads from an input stream, this function reads the next - line of the input. - - @pre - p p p p p p u u u u u x . . . . . . - ^ ^ ^ ^ - m_content m_start | m_limit - m_cursor - - @post - u u u u u x x x x x x x . . . . . . - ^ ^ ^ - | m_cursor m_limit - m_start - m_content - */ - void fill_line_buffer(size_t n = 0) - { - // if line buffer is used, m_content points to its data - assert(m_line_buffer.empty() - or m_content == reinterpret_cast(m_line_buffer.data())); - - // if line buffer is used, m_limit is set past the end of its data - assert(m_line_buffer.empty() - or m_limit == m_content + m_line_buffer.size()); - - // pointer relationships - assert(m_content <= m_start); - assert(m_start <= m_cursor); - assert(m_cursor <= m_limit); - assert(m_marker == nullptr or m_marker <= m_limit); - - // number of processed characters (p) - const auto num_processed_chars = static_cast(m_start - m_content); - // offset for m_marker wrt. to m_start - const auto offset_marker = (m_marker == nullptr) ? 0 : m_marker - m_start; - // number of unprocessed characters (u) - const auto offset_cursor = m_cursor - m_start; - - // no stream is used or end of file is reached - if (m_stream == nullptr or m_stream->eof()) - { - // m_start may or may not be pointing into m_line_buffer at - // this point. We trust the standard library to do the right - // thing. See http://stackoverflow.com/q/28142011/266378 - m_line_buffer.assign(m_start, m_limit); - - // append n characters to make sure that there is sufficient - // space between m_cursor and m_limit - m_line_buffer.append(1, '\x00'); - if (n > 0) - { - m_line_buffer.append(n - 1, '\x01'); - } - } - else - { - // delete processed characters from line buffer - m_line_buffer.erase(0, num_processed_chars); - // read next line from input stream - m_line_buffer_tmp.clear(); - - // check if stream is still good - if (m_stream->fail()) - { - JSON_THROW(parse_error(111, 0, "bad input stream")); - } - - std::getline(*m_stream, m_line_buffer_tmp, '\n'); - - // add line with newline symbol to the line buffer - m_line_buffer += m_line_buffer_tmp; - m_line_buffer.push_back('\n'); - } - - // set pointers - m_content = reinterpret_cast(m_line_buffer.data()); - assert(m_content != nullptr); - m_start = m_content; - m_marker = m_start + offset_marker; - m_cursor = m_start + offset_cursor; - m_limit = m_start + m_line_buffer.size(); - } - - /// return string representation of last read token - string_t get_token_string() const - { - assert(m_start != nullptr); - return string_t(reinterpret_cast(m_start), - static_cast(m_cursor - m_start)); - } - - /*! - @brief return string value for string tokens - - The function iterates the characters between the opening and closing - quotes of the string value. The complete string is the range - [m_start,m_cursor). Consequently, we iterate from m_start+1 to - m_cursor-1. - - We differentiate two cases: - - 1. Escaped characters. In this case, a new character is constructed - according to the nature of the escape. Some escapes create new - characters (e.g., `"\\n"` is replaced by `"\n"`), some are copied - as is (e.g., `"\\\\"`). Furthermore, Unicode escapes of the shape - `"\\uxxxx"` need special care. In this case, to_unicode takes care - of the construction of the values. - 2. Unescaped characters are copied as is. - - @pre `m_cursor - m_start >= 2`, meaning the length of the last token - is at least 2 bytes which is trivially true for any string (which - consists of at least two quotes). - - " c1 c2 c3 ... " - ^ ^ - m_start m_cursor - - @complexity Linear in the length of the string.\n - - Lemma: The loop body will always terminate.\n - - Proof (by contradiction): Assume the loop body does not terminate. As - the loop body does not contain another loop, one of the called - functions must never return. The called functions are `std::strtoul` - and to_unicode. Neither function can loop forever, so the loop body - will never loop forever which contradicts the assumption that the loop - body does not terminate, q.e.d.\n - - Lemma: The loop condition for the for loop is eventually false.\n - - Proof (by contradiction): Assume the loop does not terminate. Due to - the above lemma, this can only be due to a tautological loop - condition; that is, the loop condition i < m_cursor - 1 must always be - true. Let x be the change of i for any loop iteration. Then - m_start + 1 + x < m_cursor - 1 must hold to loop indefinitely. This - can be rephrased to m_cursor - m_start - 2 > x. With the - precondition, we x <= 0, meaning that the loop condition holds - indefinitely if i is always decreased. However, observe that the value - of i is strictly increasing with each iteration, as it is incremented - by 1 in the iteration expression and never decremented inside the loop - body. Hence, the loop condition will eventually be false which - contradicts the assumption that the loop condition is a tautology, - q.e.d. - - @return string value of current token without opening and closing - quotes - @throw parse_error.102 if to_unicode fails or surrogate error - @throw parse_error.103 if to_unicode fails - */ - string_t get_string() const - { - assert(m_cursor - m_start >= 2); - - string_t result; - result.reserve(static_cast(m_cursor - m_start - 2)); - - // iterate the result between the quotes - for (const lexer_char_t* i = m_start + 1; i < m_cursor - 1; ++i) - { - // find next escape character - auto e = std::find(i, m_cursor - 1, '\\'); - if (e != i) - { - // see https://github.com/nlohmann/json/issues/365#issuecomment-262874705 - for (auto k = i; k < e; k++) - { - result.push_back(static_cast(*k)); - } - i = e - 1; // -1 because of ++i - } - else - { - // processing escaped character - // read next character - ++i; - - switch (*i) - { - // the default escapes - case 't': - { - result += "\t"; - break; - } - case 'b': - { - result += "\b"; - break; - } - case 'f': - { - result += "\f"; - break; - } - case 'n': - { - result += "\n"; - break; - } - case 'r': - { - result += "\r"; - break; - } - case '\\': - { - result += "\\"; - break; - } - case '/': - { - result += "/"; - break; - } - case '"': - { - result += "\""; - break; - } - - // unicode - case 'u': - { - // get code xxxx from uxxxx - auto codepoint = std::strtoul(std::string(reinterpret_cast(i + 1), - 4).c_str(), nullptr, 16); - - // check if codepoint is a high surrogate - if (codepoint >= 0xD800 and codepoint <= 0xDBFF) - { - // make sure there is a subsequent unicode - if ((i + 6 >= m_limit) or * (i + 5) != '\\' or * (i + 6) != 'u') - { - JSON_THROW(parse_error(102, get_position(), "missing low surrogate")); - } - - // get code yyyy from uxxxx\uyyyy - auto codepoint2 = std::strtoul(std::string(reinterpret_cast - (i + 7), 4).c_str(), nullptr, 16); - result += to_unicode(codepoint, codepoint2); - // skip the next 10 characters (xxxx\uyyyy) - i += 10; - } - else if (codepoint >= 0xDC00 and codepoint <= 0xDFFF) - { - // we found a lone low surrogate - JSON_THROW(parse_error(102, get_position(), "missing high surrogate")); - } - else - { - // add unicode character(s) - result += to_unicode(codepoint); - // skip the next four characters (xxxx) - i += 4; - } - break; - } - } - } - } - - return result; - } - - - /*! - @brief parse string into a built-in arithmetic type as if the current - locale is POSIX. - - @note in floating-point case strtod may parse past the token's end - - this is not an error - - @note any leading blanks are not handled - */ - struct strtonum - { - public: - strtonum(const char* start, const char* end) - : m_start(start), m_end(end) - {} - - /*! - @return true iff parsed successfully as number of type T - - @param[in,out] val shall contain parsed value, or undefined value - if could not parse - */ - template::value>::type> - bool to(T& val) const - { - return parse(val, std::is_integral()); - } - - private: - const char* const m_start = nullptr; - const char* const m_end = nullptr; - - // floating-point conversion - - // overloaded wrappers for strtod/strtof/strtold - // that will be called from parse - static void strtof(float& f, const char* str, char** endptr) - { - f = std::strtof(str, endptr); - } - - static void strtof(double& f, const char* str, char** endptr) - { - f = std::strtod(str, endptr); - } - - static void strtof(long double& f, const char* str, char** endptr) - { - f = std::strtold(str, endptr); - } - - template - bool parse(T& value, /*is_integral=*/std::false_type) const - { - // replace decimal separator with locale-specific version, - // when necessary; data will point to either the original - // string, or buf, or tempstr containing the fixed string. - std::string tempstr; - std::array buf; - const size_t len = static_cast(m_end - m_start); - - // lexer will reject empty numbers - assert(len > 0); - - // since dealing with strtod family of functions, we're - // getting the decimal point char from the C locale facilities - // instead of C++'s numpunct facet of the current std::locale - const auto loc = localeconv(); - assert(loc != nullptr); - const char decimal_point_char = (loc->decimal_point == nullptr) ? '.' : loc->decimal_point[0]; - - const char* data = m_start; - - if (decimal_point_char != '.') - { - const size_t ds_pos = static_cast(std::find(m_start, m_end, '.') - m_start); - - if (ds_pos != len) - { - // copy the data into the local buffer or tempstr, if - // buffer is too small; replace decimal separator, and - // update data to point to the modified bytes - if ((len + 1) < buf.size()) - { - std::copy(m_start, m_end, buf.begin()); - buf[len] = 0; - buf[ds_pos] = decimal_point_char; - data = buf.data(); - } - else - { - tempstr.assign(m_start, m_end); - tempstr[ds_pos] = decimal_point_char; - data = tempstr.c_str(); - } - } - } - - char* endptr = nullptr; - value = 0; - // this calls appropriate overload depending on T - strtof(value, data, &endptr); - - // parsing was successful iff strtof parsed exactly the number - // of characters determined by the lexer (len) - const bool ok = (endptr == (data + len)); - - if (ok and (value == static_cast(0.0)) and (*data == '-')) - { - // some implementations forget to negate the zero - value = -0.0; - } - - return ok; - } - - // integral conversion - - signed long long parse_integral(char** endptr, /*is_signed*/std::true_type) const - { - return std::strtoll(m_start, endptr, 10); - } - - unsigned long long parse_integral(char** endptr, /*is_signed*/std::false_type) const - { - return std::strtoull(m_start, endptr, 10); - } - - template - bool parse(T& value, /*is_integral=*/std::true_type) const - { - char* endptr = nullptr; - errno = 0; // these are thread-local - const auto x = parse_integral(&endptr, std::is_signed()); - - // called right overload? - static_assert(std::is_signed() == std::is_signed(), ""); - - value = static_cast(x); - - return (x == static_cast(value)) // x fits into destination T - and (x < 0) == (value < 0) // preserved sign - //and ((x != 0) or is_integral()) // strto[u]ll did nto fail - and (errno == 0) // strto[u]ll did not overflow - and (m_start < m_end) // token was not empty - and (endptr == m_end); // parsed entire token exactly - } - }; - - /*! - @brief return number value for number tokens - - This function translates the last token into the most appropriate - number type (either integer, unsigned integer or floating point), - which is passed back to the caller via the result parameter. - - integral numbers that don't fit into the the range of the respective - type are parsed as number_float_t - - floating-point values do not satisfy std::isfinite predicate - are converted to value_t::null - - throws if the entire string [m_start .. m_cursor) cannot be - interpreted as a number - - @param[out] result @ref basic_json object to receive the number. - @param[in] token the type of the number token - */ - bool get_number(basic_json& result, const token_type token) const - { - assert(m_start != nullptr); - assert(m_start < m_cursor); - assert((token == token_type::value_unsigned) or - (token == token_type::value_integer) or - (token == token_type::value_float)); - - strtonum num_converter(reinterpret_cast(m_start), - reinterpret_cast(m_cursor)); - - switch (token) - { - case lexer::token_type::value_unsigned: - { - number_unsigned_t val; - if (num_converter.to(val)) - { - // parsing successful - result.m_type = value_t::number_unsigned; - result.m_value = val; - return true; - } - break; - } - - case lexer::token_type::value_integer: - { - number_integer_t val; - if (num_converter.to(val)) - { - // parsing successful - result.m_type = value_t::number_integer; - result.m_value = val; - return true; - } - break; - } - - default: - { - break; - } - } - - // parse float (either explicitly or because a previous conversion - // failed) - number_float_t val; - if (num_converter.to(val)) - { - // parsing successful - result.m_type = value_t::number_float; - result.m_value = val; - - // throw in case of infinity or NAN - if (not std::isfinite(result.m_value.number_float)) - { - JSON_THROW(out_of_range(406, "number overflow parsing '" + get_token_string() + "'")); - } - - return true; - } - - // couldn't parse number in any format - return false; - } - - constexpr size_t get_position() const - { - return position; - } - - private: - /// optional input stream - std::istream* m_stream = nullptr; - /// line buffer buffer for m_stream - string_t m_line_buffer {}; - /// used for filling m_line_buffer - string_t m_line_buffer_tmp {}; - /// the buffer pointer - const lexer_char_t* m_content = nullptr; - /// pointer to the beginning of the current symbol - const lexer_char_t* m_start = nullptr; - /// pointer for backtracking information - const lexer_char_t* m_marker = nullptr; - /// pointer to the current symbol - const lexer_char_t* m_cursor = nullptr; - /// pointer to the end of the buffer - const lexer_char_t* m_limit = nullptr; - /// the last token type - token_type last_token_type = token_type::end_of_input; - /// current position in the input (read bytes) - size_t position = 0; - }; - - /*! - @brief syntax analysis - - This class implements a recursive decent parser. - */ - class parser - { - public: - /// a parser reading from a string literal - parser(const char* buff, const parser_callback_t cb = nullptr) - : callback(cb), - m_lexer(reinterpret_cast(buff), std::strlen(buff)) - {} - - /*! - @brief a parser reading from an input stream - @throw parse_error.111 if input stream is in a bad state - */ - parser(std::istream& is, const parser_callback_t cb = nullptr) - : callback(cb), m_lexer(is) - {} - - /// a parser reading from an iterator range with contiguous storage - template::iterator_category, std::random_access_iterator_tag>::value - , int>::type - = 0> - parser(IteratorType first, IteratorType last, const parser_callback_t cb = nullptr) - : callback(cb), - m_lexer(reinterpret_cast(&(*first)), - static_cast(std::distance(first, last))) - {} - - /*! - @brief public parser interface - @throw parse_error.101 in case of an unexpected token - @throw parse_error.102 if to_unicode fails or surrogate error - @throw parse_error.103 if to_unicode fails - */ - basic_json parse() - { - // read first token - get_token(); - - basic_json result = parse_internal(true); - result.assert_invariant(); - - expect(lexer::token_type::end_of_input); - - // return parser result and replace it with null in case the - // top-level value was discarded by the callback function - return result.is_discarded() ? basic_json() : std::move(result); - } - - private: - /*! - @brief the actual parser - @throw parse_error.101 in case of an unexpected token - @throw parse_error.102 if to_unicode fails or surrogate error - @throw parse_error.103 if to_unicode fails - */ - basic_json parse_internal(bool keep) - { - auto result = basic_json(value_t::discarded); - - switch (last_token) - { - case lexer::token_type::begin_object: - { - if (keep and (not callback - or ((keep = callback(depth++, parse_event_t::object_start, result)) != 0))) - { - // explicitly set result to object to cope with {} - result.m_type = value_t::object; - result.m_value = value_t::object; - } - - // read next token - get_token(); - - // closing } -> we are done - if (last_token == lexer::token_type::end_object) - { - get_token(); - if (keep and callback and not callback(--depth, parse_event_t::object_end, result)) - { - result = basic_json(value_t::discarded); - } - return result; - } - - // no comma is expected here - unexpect(lexer::token_type::value_separator); - - // otherwise: parse key-value pairs - do - { - // ugly, but could be fixed with loop reorganization - if (last_token == lexer::token_type::value_separator) - { - get_token(); - } - - // store key - expect(lexer::token_type::value_string); - const auto key = m_lexer.get_string(); - - bool keep_tag = false; - if (keep) - { - if (callback) - { - basic_json k(key); - keep_tag = callback(depth, parse_event_t::key, k); - } - else - { - keep_tag = true; - } - } - - // parse separator (:) - get_token(); - expect(lexer::token_type::name_separator); - - // parse and add value - get_token(); - auto value = parse_internal(keep); - if (keep and keep_tag and not value.is_discarded()) - { - result[key] = std::move(value); - } - } - while (last_token == lexer::token_type::value_separator); - - // closing } - expect(lexer::token_type::end_object); - get_token(); - if (keep and callback and not callback(--depth, parse_event_t::object_end, result)) - { - result = basic_json(value_t::discarded); - } - - return result; - } - - case lexer::token_type::begin_array: - { - if (keep and (not callback - or ((keep = callback(depth++, parse_event_t::array_start, result)) != 0))) - { - // explicitly set result to object to cope with [] - result.m_type = value_t::array; - result.m_value = value_t::array; - } - - // read next token - get_token(); - - // closing ] -> we are done - if (last_token == lexer::token_type::end_array) - { - get_token(); - if (callback and not callback(--depth, parse_event_t::array_end, result)) - { - result = basic_json(value_t::discarded); - } - return result; - } - - // no comma is expected here - unexpect(lexer::token_type::value_separator); - - // otherwise: parse values - do - { - // ugly, but could be fixed with loop reorganization - if (last_token == lexer::token_type::value_separator) - { - get_token(); - } - - // parse value - auto value = parse_internal(keep); - if (keep and not value.is_discarded()) - { - result.push_back(std::move(value)); - } - } - while (last_token == lexer::token_type::value_separator); - - // closing ] - expect(lexer::token_type::end_array); - get_token(); - if (keep and callback and not callback(--depth, parse_event_t::array_end, result)) - { - result = basic_json(value_t::discarded); - } - - return result; - } - - case lexer::token_type::literal_null: - { - get_token(); - result.m_type = value_t::null; - break; - } - - case lexer::token_type::value_string: - { - const auto s = m_lexer.get_string(); - get_token(); - result = basic_json(s); - break; - } - - case lexer::token_type::literal_true: - { - get_token(); - result.m_type = value_t::boolean; - result.m_value = true; - break; - } - - case lexer::token_type::literal_false: - { - get_token(); - result.m_type = value_t::boolean; - result.m_value = false; - break; - } - - case lexer::token_type::value_unsigned: - case lexer::token_type::value_integer: - case lexer::token_type::value_float: - { - m_lexer.get_number(result, last_token); - get_token(); - break; - } - - default: - { - // the last token was unexpected - unexpect(last_token); - } - } - - if (keep and callback and not callback(depth, parse_event_t::value, result)) - { - result = basic_json(value_t::discarded); - } - return result; - } - - /// get next token from lexer - typename lexer::token_type get_token() - { - last_token = m_lexer.scan(); - return last_token; - } - - /*! - @throw parse_error.101 if expected token did not occur - */ - void expect(typename lexer::token_type t) const - { - if (t != last_token) - { - std::string error_msg = "parse error - unexpected "; - error_msg += (last_token == lexer::token_type::parse_error ? ("'" + m_lexer.get_token_string() + - "'") : - lexer::token_type_name(last_token)); - error_msg += "; expected " + lexer::token_type_name(t); - JSON_THROW(parse_error(101, m_lexer.get_position(), error_msg)); - } - } - - /*! - @throw parse_error.101 if unexpected token occurred - */ - void unexpect(typename lexer::token_type t) const - { - if (t == last_token) - { - std::string error_msg = "parse error - unexpected "; - error_msg += (last_token == lexer::token_type::parse_error ? ("'" + m_lexer.get_token_string() + - "'") : - lexer::token_type_name(last_token)); - JSON_THROW(parse_error(101, m_lexer.get_position(), error_msg)); - } - } - - private: - /// current level of recursion - int depth = 0; - /// callback function - const parser_callback_t callback = nullptr; - /// the type of the last read token - typename lexer::token_type last_token = lexer::token_type::uninitialized; - /// the lexer - lexer m_lexer; - }; - - public: - /*! - @brief JSON Pointer - - A JSON pointer defines a string syntax for identifying a specific value - within a JSON document. It can be used with functions `at` and - `operator[]`. Furthermore, JSON pointers are the base for JSON patches. - - @sa [RFC 6901](https://tools.ietf.org/html/rfc6901) - - @since version 2.0.0 - */ - class json_pointer - { - /// allow basic_json to access private members - friend class basic_json; - - public: - /*! - @brief create JSON pointer - - Create a JSON pointer according to the syntax described in - [Section 3 of RFC6901](https://tools.ietf.org/html/rfc6901#section-3). - - @param[in] s string representing the JSON pointer; if omitted, the - empty string is assumed which references the whole JSON - value - - @throw parse_error.107 if the given JSON pointer @a s is nonempty and - does not begin with a slash (`/`); see example below - - @throw parse_error.108 if a tilde (`~`) in the given JSON pointer @a s - is not followed by `0` (representing `~`) or `1` (representing `/`); - see example below - - @liveexample{The example shows the construction several valid JSON - pointers as well as the exceptional behavior.,json_pointer} - - @since version 2.0.0 - */ - explicit json_pointer(const std::string& s = "") - : reference_tokens(split(s)) - {} - - /*! - @brief return a string representation of the JSON pointer - - @invariant For each JSON pointer `ptr`, it holds: - @code {.cpp} - ptr == json_pointer(ptr.to_string()); - @endcode - - @return a string representation of the JSON pointer - - @liveexample{The example shows the result of `to_string`., - json_pointer__to_string} - - @since version 2.0.0 - */ - std::string to_string() const noexcept - { - return std::accumulate(reference_tokens.begin(), - reference_tokens.end(), std::string{}, - [](const std::string & a, const std::string & b) - { - return a + "/" + escape(b); - }); - } - - /// @copydoc to_string() - operator std::string() const - { - return to_string(); - } - - private: - /*! - @brief remove and return last reference pointer - @throw out_of_range.405 if JSON pointer has no parent - */ - std::string pop_back() - { - if (is_root()) - { - JSON_THROW(out_of_range(405, "JSON pointer has no parent")); - } - - auto last = reference_tokens.back(); - reference_tokens.pop_back(); - return last; - } - - /// return whether pointer points to the root document - bool is_root() const - { - return reference_tokens.empty(); - } - - json_pointer top() const - { - if (is_root()) - { - JSON_THROW(out_of_range(405, "JSON pointer has no parent")); - } - - json_pointer result = *this; - result.reference_tokens = {reference_tokens[0]}; - return result; - } - - /*! - @brief create and return a reference to the pointed to value - - @complexity Linear in the number of reference tokens. - - @throw parse_error.109 if array index is not a number - @throw type_error.313 if value cannot be unflattened - */ - reference get_and_create(reference j) const - { - pointer result = &j; - - // in case no reference tokens exist, return a reference to the - // JSON value j which will be overwritten by a primitive value - for (const auto& reference_token : reference_tokens) - { - switch (result->m_type) - { - case value_t::null: - { - if (reference_token == "0") - { - // start a new array if reference token is 0 - result = &result->operator[](0); - } - else - { - // start a new object otherwise - result = &result->operator[](reference_token); - } - break; - } - - case value_t::object: - { - // create an entry in the object - result = &result->operator[](reference_token); - break; - } - - case value_t::array: - { - // create an entry in the array - JSON_TRY - { - result = &result->operator[](static_cast(std::stoi(reference_token))); - } - JSON_CATCH(std::invalid_argument&) - { - JSON_THROW(parse_error(109, 0, "array index '" + reference_token + "' is not a number")); - } - break; - } - - /* - The following code is only reached if there exists a - reference token _and_ the current value is primitive. In - this case, we have an error situation, because primitive - values may only occur as single value; that is, with an - empty list of reference tokens. - */ - default: - { - JSON_THROW(type_error(313, "invalid value to unflatten")); - } - } - } - - return *result; - } - - /*! - @brief return a reference to the pointed to value - - @note This version does not throw if a value is not present, but tries - to create nested values instead. For instance, calling this function - with pointer `"/this/that"` on a null value is equivalent to calling - `operator[]("this").operator[]("that")` on that value, effectively - changing the null value to an object. - - @param[in] ptr a JSON value - - @return reference to the JSON value pointed to by the JSON pointer - - @complexity Linear in the length of the JSON pointer. - - @throw parse_error.106 if an array index begins with '0' - @throw parse_error.109 if an array index was not a number - @throw out_of_range.404 if the JSON pointer can not be resolved - */ - reference get_unchecked(pointer ptr) const - { - for (const auto& reference_token : reference_tokens) - { - // convert null values to arrays or objects before continuing - if (ptr->m_type == value_t::null) - { - // check if reference token is a number - const bool nums = std::all_of(reference_token.begin(), - reference_token.end(), - [](const char x) - { - return (x >= '0' and x <= '9'); - }); - - // change value to array for numbers or "-" or to object - // otherwise - if (nums or reference_token == "-") - { - *ptr = value_t::array; - } - else - { - *ptr = value_t::object; - } - } - - switch (ptr->m_type) - { - case value_t::object: - { - // use unchecked object access - ptr = &ptr->operator[](reference_token); - break; - } - - case value_t::array: - { - // error condition (cf. RFC 6901, Sect. 4) - if (reference_token.size() > 1 and reference_token[0] == '0') - { - JSON_THROW(parse_error(106, 0, "array index '" + reference_token + "' must not begin with '0'")); - } - - if (reference_token == "-") - { - // explicitly treat "-" as index beyond the end - ptr = &ptr->operator[](ptr->m_value.array->size()); - } - else - { - // convert array index to number; unchecked access - JSON_TRY - { - ptr = &ptr->operator[](static_cast(std::stoi(reference_token))); - } - JSON_CATCH(std::invalid_argument&) - { - JSON_THROW(parse_error(109, 0, "array index '" + reference_token + "' is not a number")); - } - } - break; - } - - default: - { - JSON_THROW(out_of_range(404, "unresolved reference token '" + reference_token + "'")); - } - } - } - - return *ptr; - } - - /*! - @throw parse_error.106 if an array index begins with '0' - @throw parse_error.109 if an array index was not a number - @throw out_of_range.402 if the array index '-' is used - @throw out_of_range.404 if the JSON pointer can not be resolved - */ - reference get_checked(pointer ptr) const - { - for (const auto& reference_token : reference_tokens) - { - switch (ptr->m_type) - { - case value_t::object: - { - // note: at performs range check - ptr = &ptr->at(reference_token); - break; - } - - case value_t::array: - { - if (reference_token == "-") - { - // "-" always fails the range check - JSON_THROW(out_of_range(402, "array index '-' (" + - std::to_string(ptr->m_value.array->size()) + - ") is out of range")); - } - - // error condition (cf. RFC 6901, Sect. 4) - if (reference_token.size() > 1 and reference_token[0] == '0') - { - JSON_THROW(parse_error(106, 0, "array index '" + reference_token + "' must not begin with '0'")); - } - - // note: at performs range check - JSON_TRY - { - ptr = &ptr->at(static_cast(std::stoi(reference_token))); - } - JSON_CATCH(std::invalid_argument&) - { - JSON_THROW(parse_error(109, 0, "array index '" + reference_token + "' is not a number")); - } - break; - } - - default: - { - JSON_THROW(out_of_range(404, "unresolved reference token '" + reference_token + "'")); - } - } - } - - return *ptr; - } - - /*! - @brief return a const reference to the pointed to value - - @param[in] ptr a JSON value - - @return const reference to the JSON value pointed to by the JSON - pointer - - @throw parse_error.106 if an array index begins with '0' - @throw parse_error.109 if an array index was not a number - @throw out_of_range.402 if the array index '-' is used - @throw out_of_range.404 if the JSON pointer can not be resolved - */ - const_reference get_unchecked(const_pointer ptr) const - { - for (const auto& reference_token : reference_tokens) - { - switch (ptr->m_type) - { - case value_t::object: - { - // use unchecked object access - ptr = &ptr->operator[](reference_token); - break; - } - - case value_t::array: - { - if (reference_token == "-") - { - // "-" cannot be used for const access - JSON_THROW(out_of_range(402, "array index '-' (" + - std::to_string(ptr->m_value.array->size()) + - ") is out of range")); - } - - // error condition (cf. RFC 6901, Sect. 4) - if (reference_token.size() > 1 and reference_token[0] == '0') - { - JSON_THROW(parse_error(106, 0, "array index '" + reference_token + "' must not begin with '0'")); - } - - // use unchecked array access - JSON_TRY - { - ptr = &ptr->operator[](static_cast(std::stoi(reference_token))); - } - JSON_CATCH(std::invalid_argument&) - { - JSON_THROW(parse_error(109, 0, "array index '" + reference_token + "' is not a number")); - } - break; - } - - default: - { - JSON_THROW(out_of_range(404, "unresolved reference token '" + reference_token + "'")); - } - } - } - - return *ptr; - } - - /*! - @throw parse_error.106 if an array index begins with '0' - @throw parse_error.109 if an array index was not a number - @throw out_of_range.402 if the array index '-' is used - @throw out_of_range.404 if the JSON pointer can not be resolved - */ - const_reference get_checked(const_pointer ptr) const - { - for (const auto& reference_token : reference_tokens) - { - switch (ptr->m_type) - { - case value_t::object: - { - // note: at performs range check - ptr = &ptr->at(reference_token); - break; - } - - case value_t::array: - { - if (reference_token == "-") - { - // "-" always fails the range check - JSON_THROW(out_of_range(402, "array index '-' (" + - std::to_string(ptr->m_value.array->size()) + - ") is out of range")); - } - - // error condition (cf. RFC 6901, Sect. 4) - if (reference_token.size() > 1 and reference_token[0] == '0') - { - JSON_THROW(parse_error(106, 0, "array index '" + reference_token + "' must not begin with '0'")); - } - - // note: at performs range check - JSON_TRY - { - ptr = &ptr->at(static_cast(std::stoi(reference_token))); - } - JSON_CATCH(std::invalid_argument&) - { - JSON_THROW(parse_error(109, 0, "array index '" + reference_token + "' is not a number")); - } - break; - } - - default: - { - JSON_THROW(out_of_range(404, "unresolved reference token '" + reference_token + "'")); - } - } - } - - return *ptr; - } - - /*! - @brief split the string input to reference tokens - - @note This function is only called by the json_pointer constructor. - All exceptions below are documented there. - - @throw parse_error.107 if the pointer is not empty or begins with '/' - @throw parse_error.108 if character '~' is not followed by '0' or '1' - */ - static std::vector split(const std::string& reference_string) - { - std::vector result; - - // special case: empty reference string -> no reference tokens - if (reference_string.empty()) - { - return result; - } - - // check if nonempty reference string begins with slash - if (reference_string[0] != '/') - { - JSON_THROW(parse_error(107, 1, "JSON pointer must be empty or begin with '/' - was: '" + reference_string + "'")); - } - - // extract the reference tokens: - // - slash: position of the last read slash (or end of string) - // - start: position after the previous slash - for ( - // search for the first slash after the first character - size_t slash = reference_string.find_first_of('/', 1), - // set the beginning of the first reference token - start = 1; - // we can stop if start == string::npos+1 = 0 - start != 0; - // set the beginning of the next reference token - // (will eventually be 0 if slash == std::string::npos) - start = slash + 1, - // find next slash - slash = reference_string.find_first_of('/', start)) - { - // use the text between the beginning of the reference token - // (start) and the last slash (slash). - auto reference_token = reference_string.substr(start, slash - start); - - // check reference tokens are properly escaped - for (size_t pos = reference_token.find_first_of('~'); - pos != std::string::npos; - pos = reference_token.find_first_of('~', pos + 1)) - { - assert(reference_token[pos] == '~'); - - // ~ must be followed by 0 or 1 - if (pos == reference_token.size() - 1 or - (reference_token[pos + 1] != '0' and - reference_token[pos + 1] != '1')) - { - JSON_THROW(parse_error(108, 0, "escape character '~' must be followed with '0' or '1'")); - } - } - - // finally, store the reference token - unescape(reference_token); - result.push_back(reference_token); - } - - return result; - } - - /*! - @brief replace all occurrences of a substring by another string - - @param[in,out] s the string to manipulate; changed so that all - occurrences of @a f are replaced with @a t - @param[in] f the substring to replace with @a t - @param[in] t the string to replace @a f - - @pre The search string @a f must not be empty. **This precondition is - enforced with an assertion.** - - @since version 2.0.0 - */ - static void replace_substring(std::string& s, - const std::string& f, - const std::string& t) - { - assert(not f.empty()); - - for ( - size_t pos = s.find(f); // find first occurrence of f - pos != std::string::npos; // make sure f was found - s.replace(pos, f.size(), t), // replace with t - pos = s.find(f, pos + t.size()) // find next occurrence of f - ); - } - - /// escape tilde and slash - static std::string escape(std::string s) - { - // escape "~"" to "~0" and "/" to "~1" - replace_substring(s, "~", "~0"); - replace_substring(s, "/", "~1"); - return s; - } - - /// unescape tilde and slash - static void unescape(std::string& s) - { - // first transform any occurrence of the sequence '~1' to '/' - replace_substring(s, "~1", "/"); - // then transform any occurrence of the sequence '~0' to '~' - replace_substring(s, "~0", "~"); - } - - /*! - @param[in] reference_string the reference string to the current value - @param[in] value the value to consider - @param[in,out] result the result object to insert values to - - @note Empty objects or arrays are flattened to `null`. - */ - static void flatten(const std::string& reference_string, - const basic_json& value, - basic_json& result) - { - switch (value.m_type) - { - case value_t::array: - { - if (value.m_value.array->empty()) - { - // flatten empty array as null - result[reference_string] = nullptr; - } - else - { - // iterate array and use index as reference string - for (size_t i = 0; i < value.m_value.array->size(); ++i) - { - flatten(reference_string + "/" + std::to_string(i), - value.m_value.array->operator[](i), result); - } - } - break; - } - - case value_t::object: - { - if (value.m_value.object->empty()) - { - // flatten empty object as null - result[reference_string] = nullptr; - } - else - { - // iterate object and use keys as reference string - for (const auto& element : *value.m_value.object) - { - flatten(reference_string + "/" + escape(element.first), - element.second, result); - } - } - break; - } - - default: - { - // add primitive value with its reference string - result[reference_string] = value; - break; - } - } - } - - /*! - @param[in] value flattened JSON - - @return unflattened JSON - - @throw parse_error.109 if array index is not a number - @throw type_error.314 if value is not an object - @throw type_error.315 if object values are not primitive - @throw type_error.313 if value cannot be unflattened - */ - static basic_json unflatten(const basic_json& value) - { - if (not value.is_object()) - { - JSON_THROW(type_error(314, "only objects can be unflattened")); - } - - basic_json result; - - // iterate the JSON object values - for (const auto& element : *value.m_value.object) - { - if (not element.second.is_primitive()) - { - JSON_THROW(type_error(315, "values in object must be primitive")); - } - - // assign value to reference pointed to by JSON pointer; Note - // that if the JSON pointer is "" (i.e., points to the whole - // value), function get_and_create returns a reference to - // result itself. An assignment will then create a primitive - // value. - json_pointer(element.first).get_and_create(result) = element.second; - } - - return result; - } - - friend bool operator==(json_pointer const& lhs, - json_pointer const& rhs) noexcept - { - return lhs.reference_tokens == rhs.reference_tokens; - } - - friend bool operator!=(json_pointer const& lhs, - json_pointer const& rhs) noexcept - { - return !(lhs == rhs); - } - - /// the reference tokens - std::vector reference_tokens {}; - }; - - ////////////////////////// - // JSON Pointer support // - ////////////////////////// - - /// @name JSON Pointer functions - /// @{ - - /*! - @brief access specified element via JSON Pointer - - Uses a JSON pointer to retrieve a reference to the respective JSON value. - No bound checking is performed. Similar to @ref operator[](const typename - object_t::key_type&), `null` values are created in arrays and objects if - necessary. - - In particular: - - If the JSON pointer points to an object key that does not exist, it - is created an filled with a `null` value before a reference to it - is returned. - - If the JSON pointer points to an array index that does not exist, it - is created an filled with a `null` value before a reference to it - is returned. All indices between the current maximum and the given - index are also filled with `null`. - - The special value `-` is treated as a synonym for the index past the - end. - - @param[in] ptr a JSON pointer - - @return reference to the element pointed to by @a ptr - - @complexity Constant. - - @throw parse_error.106 if an array index begins with '0' - @throw parse_error.109 if an array index was not a number - @throw out_of_range.404 if the JSON pointer can not be resolved - - @liveexample{The behavior is shown in the example.,operatorjson_pointer} - - @since version 2.0.0 - */ - reference operator[](const json_pointer& ptr) - { - return ptr.get_unchecked(this); - } - - /*! - @brief access specified element via JSON Pointer - - Uses a JSON pointer to retrieve a reference to the respective JSON value. - No bound checking is performed. The function does not change the JSON - value; no `null` values are created. In particular, the the special value - `-` yields an exception. - - @param[in] ptr JSON pointer to the desired element - - @return const reference to the element pointed to by @a ptr - - @complexity Constant. - - @throw parse_error.106 if an array index begins with '0' - @throw parse_error.109 if an array index was not a number - @throw out_of_range.402 if the array index '-' is used - @throw out_of_range.404 if the JSON pointer can not be resolved - - @liveexample{The behavior is shown in the example.,operatorjson_pointer_const} - - @since version 2.0.0 - */ - const_reference operator[](const json_pointer& ptr) const - { - return ptr.get_unchecked(this); - } - - /*! - @brief access specified element via JSON Pointer - - Returns a reference to the element at with specified JSON pointer @a ptr, - with bounds checking. - - @param[in] ptr JSON pointer to the desired element - - @return reference to the element pointed to by @a ptr - - @throw parse_error.106 if an array index in the passed JSON pointer @a ptr - begins with '0'. See example below. - - @throw parse_error.109 if an array index in the passed JSON pointer @a ptr - is not a number. See example below. - - @throw out_of_range.401 if an array index in the passed JSON pointer @a ptr - is out of range. See example below. - - @throw out_of_range.402 if the array index '-' is used in the passed JSON - pointer @a ptr. As `at` provides checked access (and no elements are - implicitly inserted), the index '-' is always invalid. See example below. - - @throw out_of_range.404 if the JSON pointer @a ptr can not be resolved. - See example below. - - @exceptionsafety Strong guarantee: if an exception is thrown, there are no - changes in the JSON value. - - @complexity Constant. - - @since version 2.0.0 - - @liveexample{The behavior is shown in the example.,at_json_pointer} - */ - reference at(const json_pointer& ptr) - { - return ptr.get_checked(this); - } - - /*! - @brief access specified element via JSON Pointer - - Returns a const reference to the element at with specified JSON pointer @a - ptr, with bounds checking. - - @param[in] ptr JSON pointer to the desired element - - @return reference to the element pointed to by @a ptr - - @throw parse_error.106 if an array index in the passed JSON pointer @a ptr - begins with '0'. See example below. - - @throw parse_error.109 if an array index in the passed JSON pointer @a ptr - is not a number. See example below. - - @throw out_of_range.401 if an array index in the passed JSON pointer @a ptr - is out of range. See example below. - - @throw out_of_range.402 if the array index '-' is used in the passed JSON - pointer @a ptr. As `at` provides checked access (and no elements are - implicitly inserted), the index '-' is always invalid. See example below. - - @throw out_of_range.404 if the JSON pointer @a ptr can not be resolved. - See example below. - - @exceptionsafety Strong guarantee: if an exception is thrown, there are no - changes in the JSON value. - - @complexity Constant. - - @since version 2.0.0 - - @liveexample{The behavior is shown in the example.,at_json_pointer_const} - */ - const_reference at(const json_pointer& ptr) const - { - return ptr.get_checked(this); - } - - /*! - @brief return flattened JSON value - - The function creates a JSON object whose keys are JSON pointers (see [RFC - 6901](https://tools.ietf.org/html/rfc6901)) and whose values are all - primitive. The original JSON value can be restored using the @ref - unflatten() function. - - @return an object that maps JSON pointers to primitive values - - @note Empty objects and arrays are flattened to `null` and will not be - reconstructed correctly by the @ref unflatten() function. - - @complexity Linear in the size the JSON value. - - @liveexample{The following code shows how a JSON object is flattened to an - object whose keys consist of JSON pointers.,flatten} - - @sa @ref unflatten() for the reverse function - - @since version 2.0.0 - */ - basic_json flatten() const - { - basic_json result(value_t::object); - json_pointer::flatten("", *this, result); - return result; - } - - /*! - @brief unflatten a previously flattened JSON value - - The function restores the arbitrary nesting of a JSON value that has been - flattened before using the @ref flatten() function. The JSON value must - meet certain constraints: - 1. The value must be an object. - 2. The keys must be JSON pointers (see - [RFC 6901](https://tools.ietf.org/html/rfc6901)) - 3. The mapped values must be primitive JSON types. - - @return the original JSON from a flattened version - - @note Empty objects and arrays are flattened by @ref flatten() to `null` - values and can not unflattened to their original type. Apart from - this example, for a JSON value `j`, the following is always true: - `j == j.flatten().unflatten()`. - - @complexity Linear in the size the JSON value. - - @throw type_error.314 if value is not an object - @throw type_error.315 if object values are not primitve - - @liveexample{The following code shows how a flattened JSON object is - unflattened into the original nested JSON object.,unflatten} - - @sa @ref flatten() for the reverse function - - @since version 2.0.0 - */ - basic_json unflatten() const - { - return json_pointer::unflatten(*this); - } - - /// @} - - ////////////////////////// - // JSON Patch functions // - ////////////////////////// - - /// @name JSON Patch functions - /// @{ - - /*! - @brief applies a JSON patch - - [JSON Patch](http://jsonpatch.com) defines a JSON document structure for - expressing a sequence of operations to apply to a JSON) document. With - this function, a JSON Patch is applied to the current JSON value by - executing all operations from the patch. - - @param[in] json_patch JSON patch document - @return patched document - - @note The application of a patch is atomic: Either all operations succeed - and the patched document is returned or an exception is thrown. In - any case, the original value is not changed: the patch is applied - to a copy of the value. - - @throw parse_error.104 if the JSON patch does not consist of an array of - objects - - @throw parse_error.105 if the JSON patch is malformed (e.g., mandatory - attributes are missing); example: `"operation add must have member path"` - - @throw out_of_range.401 if an array index is out of range. - - @throw out_of_range.403 if a JSON pointer inside the patch could not be - resolved successfully in the current JSON value; example: `"key baz not - found"` - - @throw out_of_range.405 if JSON pointer has no parent ("add", "remove", - "move") - - @throw other_error.501 if "test" operation was unsuccessful - - @complexity Linear in the size of the JSON value and the length of the - JSON patch. As usually only a fraction of the JSON value is affected by - the patch, the complexity can usually be neglected. - - @liveexample{The following code shows how a JSON patch is applied to a - value.,patch} - - @sa @ref diff -- create a JSON patch by comparing two JSON values - - @sa [RFC 6902 (JSON Patch)](https://tools.ietf.org/html/rfc6902) - @sa [RFC 6901 (JSON Pointer)](https://tools.ietf.org/html/rfc6901) - - @since version 2.0.0 - */ - basic_json patch(const basic_json& json_patch) const - { - // make a working copy to apply the patch to - basic_json result = *this; - - // the valid JSON Patch operations - enum class patch_operations {add, remove, replace, move, copy, test, invalid}; - - const auto get_op = [](const std::string & op) - { - if (op == "add") - { - return patch_operations::add; - } - if (op == "remove") - { - return patch_operations::remove; - } - if (op == "replace") - { - return patch_operations::replace; - } - if (op == "move") - { - return patch_operations::move; - } - if (op == "copy") - { - return patch_operations::copy; - } - if (op == "test") - { - return patch_operations::test; - } - - return patch_operations::invalid; - }; - - // wrapper for "add" operation; add value at ptr - const auto operation_add = [&result](json_pointer & ptr, basic_json val) - { - // adding to the root of the target document means replacing it - if (ptr.is_root()) - { - result = val; - } - else - { - // make sure the top element of the pointer exists - json_pointer top_pointer = ptr.top(); - if (top_pointer != ptr) - { - result.at(top_pointer); - } - - // get reference to parent of JSON pointer ptr - const auto last_path = ptr.pop_back(); - basic_json& parent = result[ptr]; - - switch (parent.m_type) - { - case value_t::null: - case value_t::object: - { - // use operator[] to add value - parent[last_path] = val; - break; - } - - case value_t::array: - { - if (last_path == "-") - { - // special case: append to back - parent.push_back(val); - } - else - { - const auto idx = std::stoi(last_path); - if (static_cast(idx) > parent.size()) - { - // avoid undefined behavior - JSON_THROW(out_of_range(401, "array index " + std::to_string(idx) + " is out of range")); - } - else - { - // default case: insert add offset - parent.insert(parent.begin() + static_cast(idx), val); - } - } - break; - } - - default: - { - // if there exists a parent it cannot be primitive - assert(false); // LCOV_EXCL_LINE - } - } - } - }; - - // wrapper for "remove" operation; remove value at ptr - const auto operation_remove = [&result](json_pointer & ptr) - { - // get reference to parent of JSON pointer ptr - const auto last_path = ptr.pop_back(); - basic_json& parent = result.at(ptr); - - // remove child - if (parent.is_object()) - { - // perform range check - auto it = parent.find(last_path); - if (it != parent.end()) - { - parent.erase(it); - } - else - { - JSON_THROW(out_of_range(403, "key '" + last_path + "' not found")); - } - } - else if (parent.is_array()) - { - // note erase performs range check - parent.erase(static_cast(std::stoi(last_path))); - } - }; - - // type check: top level value must be an array - if (not json_patch.is_array()) - { - JSON_THROW(parse_error(104, 0, "JSON patch must be an array of objects")); - } - - // iterate and apply the operations - for (const auto& val : json_patch) - { - // wrapper to get a value for an operation - const auto get_value = [&val](const std::string & op, - const std::string & member, - bool string_type) -> basic_json& - { - // find value - auto it = val.m_value.object->find(member); - - // context-sensitive error message - const auto error_msg = (op == "op") ? "operation" : "operation '" + op + "'"; - - // check if desired value is present - if (it == val.m_value.object->end()) - { - JSON_THROW(parse_error(105, 0, error_msg + " must have member '" + member + "'")); - } - - // check if result is of type string - if (string_type and not it->second.is_string()) - { - JSON_THROW(parse_error(105, 0, error_msg + " must have string member '" + member + "'")); - } - - // no error: return value - return it->second; - }; - - // type check: every element of the array must be an object - if (not val.is_object()) - { - JSON_THROW(parse_error(104, 0, "JSON patch must be an array of objects")); - } - - // collect mandatory members - const std::string op = get_value("op", "op", true); - const std::string path = get_value(op, "path", true); - json_pointer ptr(path); - - switch (get_op(op)) - { - case patch_operations::add: - { - operation_add(ptr, get_value("add", "value", false)); - break; - } - - case patch_operations::remove: - { - operation_remove(ptr); - break; - } - - case patch_operations::replace: - { - // the "path" location must exist - use at() - result.at(ptr) = get_value("replace", "value", false); - break; - } - - case patch_operations::move: - { - const std::string from_path = get_value("move", "from", true); - json_pointer from_ptr(from_path); - - // the "from" location must exist - use at() - basic_json v = result.at(from_ptr); - - // The move operation is functionally identical to a - // "remove" operation on the "from" location, followed - // immediately by an "add" operation at the target - // location with the value that was just removed. - operation_remove(from_ptr); - operation_add(ptr, v); - break; - } - - case patch_operations::copy: - { - const std::string from_path = get_value("copy", "from", true);; - const json_pointer from_ptr(from_path); - - // the "from" location must exist - use at() - result[ptr] = result.at(from_ptr); - break; - } - - case patch_operations::test: - { - bool success = false; - JSON_TRY - { - // check if "value" matches the one at "path" - // the "path" location must exist - use at() - success = (result.at(ptr) == get_value("test", "value", false)); - } - JSON_CATCH (out_of_range&) - { - // ignore out of range errors: success remains false - } - - // throw an exception if test fails - if (not success) - { - JSON_THROW(other_error(501, "unsuccessful: " + val.dump())); - } - - break; - } - - case patch_operations::invalid: - { - // op must be "add", "remove", "replace", "move", "copy", or - // "test" - JSON_THROW(parse_error(105, 0, "operation value '" + op + "' is invalid")); - } - } - } - - return result; - } - - /*! - @brief creates a diff as a JSON patch - - Creates a [JSON Patch](http://jsonpatch.com) so that value @a source can - be changed into the value @a target by calling @ref patch function. - - @invariant For two JSON values @a source and @a target, the following code - yields always `true`: - @code {.cpp} - source.patch(diff(source, target)) == target; - @endcode - - @note Currently, only `remove`, `add`, and `replace` operations are - generated. - - @param[in] source JSON value to compare from - @param[in] target JSON value to compare against - @param[in] path helper value to create JSON pointers - - @return a JSON patch to convert the @a source to @a target - - @complexity Linear in the lengths of @a source and @a target. - - @liveexample{The following code shows how a JSON patch is created as a - diff for two JSON values.,diff} - - @sa @ref patch -- apply a JSON patch - - @sa [RFC 6902 (JSON Patch)](https://tools.ietf.org/html/rfc6902) - - @since version 2.0.0 - */ - static basic_json diff(const basic_json& source, - const basic_json& target, - const std::string& path = "") - { - // the patch - basic_json result(value_t::array); - - // if the values are the same, return empty patch - if (source == target) - { - return result; - } - - if (source.type() != target.type()) - { - // different types: replace value - result.push_back( - { - {"op", "replace"}, - {"path", path}, - {"value", target} - }); - } - else - { - switch (source.type()) - { - case value_t::array: - { - // first pass: traverse common elements - size_t i = 0; - while (i < source.size() and i < target.size()) - { - // recursive call to compare array values at index i - auto temp_diff = diff(source[i], target[i], path + "/" + std::to_string(i)); - result.insert(result.end(), temp_diff.begin(), temp_diff.end()); - ++i; - } - - // i now reached the end of at least one array - // in a second pass, traverse the remaining elements - - // remove my remaining elements - const auto end_index = static_cast(result.size()); - while (i < source.size()) - { - // add operations in reverse order to avoid invalid - // indices - result.insert(result.begin() + end_index, object( - { - {"op", "remove"}, - {"path", path + "/" + std::to_string(i)} - })); - ++i; - } - - // add other remaining elements - while (i < target.size()) - { - result.push_back( - { - {"op", "add"}, - {"path", path + "/" + std::to_string(i)}, - {"value", target[i]} - }); - ++i; - } - - break; - } - - case value_t::object: - { - // first pass: traverse this object's elements - for (auto it = source.begin(); it != source.end(); ++it) - { - // escape the key name to be used in a JSON patch - const auto key = json_pointer::escape(it.key()); - - if (target.find(it.key()) != target.end()) - { - // recursive call to compare object values at key it - auto temp_diff = diff(it.value(), target[it.key()], path + "/" + key); - result.insert(result.end(), temp_diff.begin(), temp_diff.end()); - } - else - { - // found a key that is not in o -> remove it - result.push_back(object( - { - {"op", "remove"}, - {"path", path + "/" + key} - })); - } - } - - // second pass: traverse other object's elements - for (auto it = target.begin(); it != target.end(); ++it) - { - if (source.find(it.key()) == source.end()) - { - // found a key that is not in this -> add it - const auto key = json_pointer::escape(it.key()); - result.push_back( - { - {"op", "add"}, - {"path", path + "/" + key}, - {"value", it.value()} - }); - } - } - - break; - } - - default: - { - // both primitive type: replace value - result.push_back( - { - {"op", "replace"}, - {"path", path}, - {"value", target} - }); - break; - } - } - } - - return result; - } - - /// @} -}; - -///////////// -// presets // -///////////// - -/*! -@brief default JSON class - -This type is the default specialization of the @ref basic_json class which -uses the standard template types. - -@since version 1.0.0 -*/ -using json = basic_json<>; -} // namespace nlohmann - - -/////////////////////// -// nonmember support // -/////////////////////// - -// specialization of std::swap, and std::hash -namespace std -{ -/*! -@brief exchanges the values of two JSON objects - -@since version 1.0.0 -*/ -template<> -inline void swap(nlohmann::json& j1, - nlohmann::json& j2) noexcept( - is_nothrow_move_constructible::value and - is_nothrow_move_assignable::value - ) -{ - j1.swap(j2); -} - -/// hash value for JSON objects -template<> -struct hash -{ - /*! - @brief return a hash value for a JSON object - - @since version 1.0.0 - */ - std::size_t operator()(const nlohmann::json& j) const - { - // a naive hashing via the string representation - const auto& h = hash(); - return h(j.dump()); - } -}; - -/// specialization for std::less -template <> -struct less<::nlohmann::detail::value_t> -{ - /*! - @brief compare two value_t enum values - @since version 3.0.0 - */ - bool operator()(nlohmann::detail::value_t lhs, - nlohmann::detail::value_t rhs) const noexcept - { - return nlohmann::detail::operator<(lhs, rhs); - } -}; - -} // namespace std - -/*! -@brief user-defined string literal for JSON values - -This operator implements a user-defined string literal for JSON objects. It -can be used by adding `"_json"` to a string literal and returns a JSON object -if no parse error occurred. - -@param[in] s a string representation of a JSON object -@param[in] n the length of string @a s -@return a JSON object - -@since version 1.0.0 -*/ -inline nlohmann::json operator "" _json(const char* s, std::size_t n) -{ - return nlohmann::json::parse(s, s + n); -} - -/*! -@brief user-defined string literal for JSON pointer - -This operator implements a user-defined string literal for JSON Pointers. It -can be used by adding `"_json_pointer"` to a string literal and returns a JSON pointer -object if no parse error occurred. - -@param[in] s a string representation of a JSON Pointer -@param[in] n the length of string @a s -@return a JSON pointer object - -@since version 2.0.0 -*/ -inline nlohmann::json::json_pointer operator "" _json_pointer(const char* s, std::size_t n) -{ - return nlohmann::json::json_pointer(std::string(s, n)); -} - -// restore GCC/clang diagnostic settings -#if defined(__clang__) || defined(__GNUC__) || defined(__GNUG__) - #pragma GCC diagnostic pop -#endif - -// clean up -#undef JSON_CATCH -#undef JSON_THROW -#undef JSON_TRY - -#endif diff --git a/test/src/unit-class_lexer.cpp b/test/src/unit-class_lexer.cpp index b88a5291..d4bf2dd2 100644 --- a/test/src/unit-class_lexer.cpp +++ b/test/src/unit-class_lexer.cpp @@ -38,79 +38,50 @@ TEST_CASE("lexer class") { SECTION("structural characters") { - CHECK((json::lexer(reinterpret_cast("["), - 1).scan() == json::lexer::token_type::begin_array)); - CHECK((json::lexer(reinterpret_cast("]"), - 1).scan() == json::lexer::token_type::end_array)); - CHECK((json::lexer(reinterpret_cast("{"), - 1).scan() == json::lexer::token_type::begin_object)); - CHECK((json::lexer(reinterpret_cast("}"), - 1).scan() == json::lexer::token_type::end_object)); - CHECK((json::lexer(reinterpret_cast(","), - 1).scan() == json::lexer::token_type::value_separator)); - CHECK((json::lexer(reinterpret_cast(":"), - 1).scan() == json::lexer::token_type::name_separator)); + CHECK((json::lexer("[", 1).scan() == json::lexer::token_type::begin_array)); + CHECK((json::lexer("]", 1).scan() == json::lexer::token_type::end_array)); + CHECK((json::lexer("{", 1).scan() == json::lexer::token_type::begin_object)); + CHECK((json::lexer("}", 1).scan() == json::lexer::token_type::end_object)); + CHECK((json::lexer(",", 1).scan() == json::lexer::token_type::value_separator)); + CHECK((json::lexer(":", 1).scan() == json::lexer::token_type::name_separator)); } SECTION("literal names") { - CHECK((json::lexer(reinterpret_cast("null"), - 4).scan() == json::lexer::token_type::literal_null)); - CHECK((json::lexer(reinterpret_cast("true"), - 4).scan() == json::lexer::token_type::literal_true)); - CHECK((json::lexer(reinterpret_cast("false"), - 5).scan() == json::lexer::token_type::literal_false)); + CHECK((json::lexer("null", 4).scan() == json::lexer::token_type::literal_null)); + CHECK((json::lexer("true", 4).scan() == json::lexer::token_type::literal_true)); + CHECK((json::lexer("false", 5).scan() == json::lexer::token_type::literal_false)); } SECTION("numbers") { - CHECK((json::lexer(reinterpret_cast("0"), - 1).scan() == json::lexer::token_type::value_unsigned)); - CHECK((json::lexer(reinterpret_cast("1"), - 1).scan() == json::lexer::token_type::value_unsigned)); - CHECK((json::lexer(reinterpret_cast("2"), - 1).scan() == json::lexer::token_type::value_unsigned)); - CHECK((json::lexer(reinterpret_cast("3"), - 1).scan() == json::lexer::token_type::value_unsigned)); - CHECK((json::lexer(reinterpret_cast("4"), - 1).scan() == json::lexer::token_type::value_unsigned)); - CHECK((json::lexer(reinterpret_cast("5"), - 1).scan() == json::lexer::token_type::value_unsigned)); - CHECK((json::lexer(reinterpret_cast("6"), - 1).scan() == json::lexer::token_type::value_unsigned)); - CHECK((json::lexer(reinterpret_cast("7"), - 1).scan() == json::lexer::token_type::value_unsigned)); - CHECK((json::lexer(reinterpret_cast("8"), - 1).scan() == json::lexer::token_type::value_unsigned)); - CHECK((json::lexer(reinterpret_cast("9"), - 1).scan() == json::lexer::token_type::value_unsigned)); + CHECK((json::lexer("0", 1).scan() == json::lexer::token_type::value_unsigned)); + CHECK((json::lexer("1", 1).scan() == json::lexer::token_type::value_unsigned)); + CHECK((json::lexer("2", 1).scan() == json::lexer::token_type::value_unsigned)); + CHECK((json::lexer("3", 1).scan() == json::lexer::token_type::value_unsigned)); + CHECK((json::lexer("4", 1).scan() == json::lexer::token_type::value_unsigned)); + CHECK((json::lexer("5", 1).scan() == json::lexer::token_type::value_unsigned)); + CHECK((json::lexer("6", 1).scan() == json::lexer::token_type::value_unsigned)); + CHECK((json::lexer("7", 1).scan() == json::lexer::token_type::value_unsigned)); + CHECK((json::lexer("8", 1).scan() == json::lexer::token_type::value_unsigned)); + CHECK((json::lexer("9", 1).scan() == json::lexer::token_type::value_unsigned)); - CHECK((json::lexer(reinterpret_cast("-0"), - 2).scan() == json::lexer::token_type::value_integer)); - CHECK((json::lexer(reinterpret_cast("-1"), - 2).scan() == json::lexer::token_type::value_integer)); + CHECK((json::lexer("-0", 2).scan() == json::lexer::token_type::value_integer)); + CHECK((json::lexer("-1", 2).scan() == json::lexer::token_type::value_integer)); - CHECK((json::lexer(reinterpret_cast("1.1"), - 3).scan() == json::lexer::token_type::value_float)); - CHECK((json::lexer(reinterpret_cast("-1.1"), - 4).scan() == json::lexer::token_type::value_float)); - CHECK((json::lexer(reinterpret_cast("1E10"), - 4).scan() == json::lexer::token_type::value_float)); + CHECK((json::lexer("1.1", 3).scan() == json::lexer::token_type::value_float)); + CHECK((json::lexer("-1.1", 4).scan() == json::lexer::token_type::value_float)); + CHECK((json::lexer("1E10", 4).scan() == json::lexer::token_type::value_float)); } SECTION("whitespace") { // result is end_of_input, because not token is following - CHECK((json::lexer(reinterpret_cast(" "), - 1).scan() == json::lexer::token_type::end_of_input)); - CHECK((json::lexer(reinterpret_cast("\t"), - 1).scan() == json::lexer::token_type::end_of_input)); - CHECK((json::lexer(reinterpret_cast("\n"), - 1).scan() == json::lexer::token_type::end_of_input)); - CHECK((json::lexer(reinterpret_cast("\r"), - 1).scan() == json::lexer::token_type::end_of_input)); - CHECK((json::lexer(reinterpret_cast(" \t\n\r\n\t "), - 7).scan() == json::lexer::token_type::end_of_input)); + CHECK((json::lexer(" ", 1).scan() == json::lexer::token_type::end_of_input)); + CHECK((json::lexer("\t", 1).scan() == json::lexer::token_type::end_of_input)); + CHECK((json::lexer("\n", 1).scan() == json::lexer::token_type::end_of_input)); + CHECK((json::lexer("\r", 1).scan() == json::lexer::token_type::end_of_input)); + CHECK((json::lexer(" \t\n\r\n\t ", 7).scan() == json::lexer::token_type::end_of_input)); } } @@ -141,8 +112,7 @@ TEST_CASE("lexer class") // create string from the ASCII code const auto s = std::string(1, static_cast(c)); // store scan() result - const auto res = json::lexer(reinterpret_cast(s.c_str()), - 1).scan(); + const auto res = json::lexer(s.c_str(), 1).scan(); switch (c) { @@ -188,12 +158,14 @@ TEST_CASE("lexer class") } } + /* NOTE: to_unicode function has been removed SECTION("to_unicode") { // lexer to call to_unicode on - json::lexer dummy_lexer(reinterpret_cast(""), 0); + json::lexer dummy_lexer("", 0); CHECK(dummy_lexer.to_unicode(0x1F4A9) == "💩"); CHECK_THROWS_AS(dummy_lexer.to_unicode(0x200000), json::parse_error); CHECK_THROWS_WITH(dummy_lexer.to_unicode(0x200000), "[json.exception.parse_error.103] parse error: code points above 0x10FFFF are invalid"); } + */ } diff --git a/test/src/unit-class_parser.cpp b/test/src/unit-class_parser.cpp index 2a5c07cf..fe2a8fbe 100644 --- a/test/src/unit-class_parser.cpp +++ b/test/src/unit-class_parser.cpp @@ -91,18 +91,18 @@ TEST_CASE("parser class") // error: tab in string CHECK_THROWS_AS(json::parser("\"\t\"").parse(), json::parse_error); CHECK_THROWS_WITH(json::parser("\"\t\"").parse(), - "[json.exception.parse_error.101] parse error at 1: parse error - unexpected '\"'"); + "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); // error: newline in string CHECK_THROWS_AS(json::parser("\"\n\"").parse(), json::parse_error); CHECK_THROWS_AS(json::parser("\"\r\"").parse(), json::parse_error); CHECK_THROWS_WITH(json::parser("\"\n\"").parse(), - "[json.exception.parse_error.101] parse error at 1: parse error - unexpected '\"'"); + "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); CHECK_THROWS_WITH(json::parser("\"\r\"").parse(), - "[json.exception.parse_error.101] parse error at 1: parse error - unexpected '\"'"); + "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); // error: backspace in string CHECK_THROWS_AS(json::parser("\"\b\"").parse(), json::parse_error); CHECK_THROWS_WITH(json::parser("\"\b\"").parse(), - "[json.exception.parse_error.101] parse error at 1: parse error - unexpected '\"'"); + "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); // improve code coverage CHECK_THROWS_AS(json::parser("\uFF01").parse(), json::parse_error); CHECK_THROWS_AS(json::parser("[-4:1,]").parse(), json::parse_error); @@ -306,39 +306,39 @@ TEST_CASE("parser class") CHECK_THROWS_AS(json::parser("+0").parse(), json::parse_error); CHECK_THROWS_WITH(json::parser("01").parse(), - "[json.exception.parse_error.101] parse error at 2: parse error - unexpected '01'"); + "[json.exception.parse_error.101] parse error at 2: syntax error - unexpected '01'"); CHECK_THROWS_WITH(json::parser("-01").parse(), - "[json.exception.parse_error.101] parse error at 3: parse error - unexpected '-01'"); + "[json.exception.parse_error.101] parse error at 3: syntax error - unexpected '-01'"); CHECK_THROWS_WITH(json::parser("--1").parse(), - "[json.exception.parse_error.101] parse error at 1: parse error - unexpected '-'"); + "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '-'"); CHECK_THROWS_WITH(json::parser("1.").parse(), - "[json.exception.parse_error.101] parse error at 2: parse error - unexpected '.'; expected end of input"); + "[json.exception.parse_error.101] parse error at 2: syntax error - unexpected '.'; expected end of input"); CHECK_THROWS_WITH(json::parser("1E").parse(), - "[json.exception.parse_error.101] parse error at 2: parse error - unexpected 'E'; expected end of input"); + "[json.exception.parse_error.101] parse error at 2: syntax error - unexpected 'E'; expected end of input"); CHECK_THROWS_WITH(json::parser("1E-").parse(), - "[json.exception.parse_error.101] parse error at 2: parse error - unexpected 'E'; expected end of input"); + "[json.exception.parse_error.101] parse error at 2: syntax error - unexpected 'E'; expected end of input"); CHECK_THROWS_WITH(json::parser("1.E1").parse(), - "[json.exception.parse_error.101] parse error at 2: parse error - unexpected '.'; expected end of input"); + "[json.exception.parse_error.101] parse error at 2: syntax error - unexpected '.'; expected end of input"); CHECK_THROWS_WITH(json::parser("-1E").parse(), - "[json.exception.parse_error.101] parse error at 3: parse error - unexpected 'E'; expected end of input"); + "[json.exception.parse_error.101] parse error at 3: syntax error - unexpected 'E'; expected end of input"); CHECK_THROWS_WITH(json::parser("-0E#").parse(), - "[json.exception.parse_error.101] parse error at 3: parse error - unexpected 'E'; expected end of input"); + "[json.exception.parse_error.101] parse error at 3: syntax error - unexpected 'E'; expected end of input"); CHECK_THROWS_WITH(json::parser("-0E-#").parse(), - "[json.exception.parse_error.101] parse error at 3: parse error - unexpected 'E'; expected end of input"); + "[json.exception.parse_error.101] parse error at 3: syntax error - unexpected 'E'; expected end of input"); CHECK_THROWS_WITH(json::parser("-0#").parse(), - "[json.exception.parse_error.101] parse error at 3: parse error - unexpected '#'; expected end of input"); + "[json.exception.parse_error.101] parse error at 3: syntax error - unexpected '#'; expected end of input"); CHECK_THROWS_WITH(json::parser("-0.0:").parse(), - "[json.exception.parse_error.101] parse error at 5: parse error - unexpected ':'; expected end of input"); + "[json.exception.parse_error.101] parse error at 5: syntax error - unexpected ':'; expected end of input"); CHECK_THROWS_WITH(json::parser("-0.0Z").parse(), - "[json.exception.parse_error.101] parse error at 5: parse error - unexpected 'Z'; expected end of input"); + "[json.exception.parse_error.101] parse error at 5: syntax error - unexpected 'Z'; expected end of input"); CHECK_THROWS_WITH(json::parser("-0E123:").parse(), - "[json.exception.parse_error.101] parse error at 7: parse error - unexpected ':'; expected end of input"); + "[json.exception.parse_error.101] parse error at 7: syntax error - unexpected ':'; expected end of input"); CHECK_THROWS_WITH(json::parser("-0e0-:").parse(), - "[json.exception.parse_error.101] parse error at 5: parse error - unexpected '-'; expected end of input"); + "[json.exception.parse_error.101] parse error at 5: syntax error - unexpected '-'; expected end of input"); CHECK_THROWS_WITH(json::parser("-0e-:").parse(), - "[json.exception.parse_error.101] parse error at 3: parse error - unexpected 'e'; expected end of input"); + "[json.exception.parse_error.101] parse error at 3: syntax error - unexpected 'e'; expected end of input"); CHECK_THROWS_WITH(json::parser("-0f").parse(), - "[json.exception.parse_error.101] parse error at 3: parse error - unexpected 'f'; expected end of input"); + "[json.exception.parse_error.101] parse error at 3: syntax error - unexpected 'f'; expected end of input"); } } } @@ -361,66 +361,66 @@ TEST_CASE("parser class") CHECK_THROWS_AS(json::parser("1E/").parse(), json::parse_error); CHECK_THROWS_AS(json::parser("1E:").parse(), json::parse_error); CHECK_THROWS_WITH(json::parser("0.").parse(), - "[json.exception.parse_error.101] parse error at 2: parse error - unexpected '.'; expected end of input"); + "[json.exception.parse_error.101] parse error at 2: syntax error - unexpected '.'; expected end of input"); CHECK_THROWS_WITH(json::parser("-").parse(), - "[json.exception.parse_error.101] parse error at 1: parse error - unexpected '-'"); + "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '-'"); CHECK_THROWS_WITH(json::parser("--").parse(), - "[json.exception.parse_error.101] parse error at 1: parse error - unexpected '-'"); + "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '-'"); CHECK_THROWS_WITH(json::parser("-0.").parse(), - "[json.exception.parse_error.101] parse error at 3: parse error - unexpected '.'; expected end of input"); + "[json.exception.parse_error.101] parse error at 3: syntax error - unexpected '.'; expected end of input"); CHECK_THROWS_WITH(json::parser("-.").parse(), - "[json.exception.parse_error.101] parse error at 1: parse error - unexpected '-'"); + "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '-'"); CHECK_THROWS_WITH(json::parser("-:").parse(), - "[json.exception.parse_error.101] parse error at 1: parse error - unexpected '-'"); + "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '-'"); CHECK_THROWS_WITH(json::parser("0.:").parse(), - "[json.exception.parse_error.101] parse error at 2: parse error - unexpected '.'; expected end of input"); + "[json.exception.parse_error.101] parse error at 2: syntax error - unexpected '.'; expected end of input"); CHECK_THROWS_WITH(json::parser("e.").parse(), - "[json.exception.parse_error.101] parse error at 1: parse error - unexpected 'e'"); + "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected 'e'"); CHECK_THROWS_WITH(json::parser("1e.").parse(), - "[json.exception.parse_error.101] parse error at 2: parse error - unexpected 'e'; expected end of input"); + "[json.exception.parse_error.101] parse error at 2: syntax error - unexpected 'e'; expected end of input"); CHECK_THROWS_WITH(json::parser("1e/").parse(), - "[json.exception.parse_error.101] parse error at 2: parse error - unexpected 'e'; expected end of input"); + "[json.exception.parse_error.101] parse error at 2: syntax error - unexpected 'e'; expected end of input"); CHECK_THROWS_WITH(json::parser("1e:").parse(), - "[json.exception.parse_error.101] parse error at 2: parse error - unexpected 'e'; expected end of input"); + "[json.exception.parse_error.101] parse error at 2: syntax error - unexpected 'e'; expected end of input"); CHECK_THROWS_WITH(json::parser("1E.").parse(), - "[json.exception.parse_error.101] parse error at 2: parse error - unexpected 'E'; expected end of input"); + "[json.exception.parse_error.101] parse error at 2: syntax error - unexpected 'E'; expected end of input"); CHECK_THROWS_WITH(json::parser("1E/").parse(), - "[json.exception.parse_error.101] parse error at 2: parse error - unexpected 'E'; expected end of input"); + "[json.exception.parse_error.101] parse error at 2: syntax error - unexpected 'E'; expected end of input"); CHECK_THROWS_WITH(json::parser("1E:").parse(), - "[json.exception.parse_error.101] parse error at 2: parse error - unexpected 'E'; expected end of input"); + "[json.exception.parse_error.101] parse error at 2: syntax error - unexpected 'E'; expected end of input"); // unexpected end of null CHECK_THROWS_AS(json::parser("n").parse(), json::parse_error); CHECK_THROWS_AS(json::parser("nu").parse(), json::parse_error); CHECK_THROWS_AS(json::parser("nul").parse(), json::parse_error); - CHECK_THROWS_WITH(json::parser("n").parse(), "[json.exception.parse_error.101] parse error at 1: parse error - unexpected 'n'"); + CHECK_THROWS_WITH(json::parser("n").parse(), "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected 'n'"); CHECK_THROWS_WITH(json::parser("nu").parse(), - "[json.exception.parse_error.101] parse error at 1: parse error - unexpected 'n'"); + "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected 'n'"); CHECK_THROWS_WITH(json::parser("nul").parse(), - "[json.exception.parse_error.101] parse error at 1: parse error - unexpected 'n'"); + "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected 'n'"); // unexpected end of true CHECK_THROWS_AS(json::parser("t").parse(), json::parse_error); CHECK_THROWS_AS(json::parser("tr").parse(), json::parse_error); CHECK_THROWS_AS(json::parser("tru").parse(), json::parse_error); - CHECK_THROWS_WITH(json::parser("t").parse(), "[json.exception.parse_error.101] parse error at 1: parse error - unexpected 't'"); + CHECK_THROWS_WITH(json::parser("t").parse(), "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected 't'"); CHECK_THROWS_WITH(json::parser("tr").parse(), - "[json.exception.parse_error.101] parse error at 1: parse error - unexpected 't'"); + "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected 't'"); CHECK_THROWS_WITH(json::parser("tru").parse(), - "[json.exception.parse_error.101] parse error at 1: parse error - unexpected 't'"); + "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected 't'"); // unexpected end of false CHECK_THROWS_AS(json::parser("f").parse(), json::parse_error); CHECK_THROWS_AS(json::parser("fa").parse(), json::parse_error); CHECK_THROWS_AS(json::parser("fal").parse(), json::parse_error); CHECK_THROWS_AS(json::parser("fals").parse(), json::parse_error); - CHECK_THROWS_WITH(json::parser("f").parse(), "[json.exception.parse_error.101] parse error at 1: parse error - unexpected 'f'"); + CHECK_THROWS_WITH(json::parser("f").parse(), "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected 'f'"); CHECK_THROWS_WITH(json::parser("fa").parse(), - "[json.exception.parse_error.101] parse error at 1: parse error - unexpected 'f'"); + "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected 'f'"); CHECK_THROWS_WITH(json::parser("fal").parse(), - "[json.exception.parse_error.101] parse error at 1: parse error - unexpected 'f'"); + "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected 'f'"); CHECK_THROWS_WITH(json::parser("fals").parse(), - "[json.exception.parse_error.101] parse error at 1: parse error - unexpected 'f'"); + "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected 'f'"); // missing/unexpected end of array CHECK_THROWS_AS(json::parser("[").parse(), json::parse_error); @@ -429,15 +429,15 @@ TEST_CASE("parser class") CHECK_THROWS_AS(json::parser("[1,]").parse(), json::parse_error); CHECK_THROWS_AS(json::parser("]").parse(), json::parse_error); CHECK_THROWS_WITH(json::parser("[").parse(), - "[json.exception.parse_error.101] parse error at 2: parse error - unexpected end of input"); + "[json.exception.parse_error.101] parse error at 2: syntax error - unexpected end of input"); CHECK_THROWS_WITH(json::parser("[1").parse(), - "[json.exception.parse_error.101] parse error at 3: parse error - unexpected end of input; expected ']'"); + "[json.exception.parse_error.101] parse error at 3: syntax error - unexpected end of input; expected ']'"); CHECK_THROWS_WITH(json::parser("[1,").parse(), - "[json.exception.parse_error.101] parse error at 4: parse error - unexpected end of input"); + "[json.exception.parse_error.101] parse error at 4: syntax error - unexpected end of input"); CHECK_THROWS_WITH(json::parser("[1,]").parse(), - "[json.exception.parse_error.101] parse error at 4: parse error - unexpected ']'"); + "[json.exception.parse_error.101] parse error at 4: syntax error - unexpected ']'"); CHECK_THROWS_WITH(json::parser("]").parse(), - "[json.exception.parse_error.101] parse error at 1: parse error - unexpected ']'"); + "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected ']'"); // missing/unexpected end of object CHECK_THROWS_AS(json::parser("{").parse(), json::parse_error); @@ -447,17 +447,17 @@ TEST_CASE("parser class") CHECK_THROWS_AS(json::parser("{\"foo\":1,}").parse(), json::parse_error); CHECK_THROWS_AS(json::parser("}").parse(), json::parse_error); CHECK_THROWS_WITH(json::parser("{").parse(), - "[json.exception.parse_error.101] parse error at 2: parse error - unexpected end of input; expected string literal"); + "[json.exception.parse_error.101] parse error at 2: syntax error - unexpected end of input; expected string literal"); CHECK_THROWS_WITH(json::parser("{\"foo\"").parse(), - "[json.exception.parse_error.101] parse error at 7: parse error - unexpected end of input; expected ':'"); + "[json.exception.parse_error.101] parse error at 7: syntax error - unexpected end of input; expected ':'"); CHECK_THROWS_WITH(json::parser("{\"foo\":").parse(), - "[json.exception.parse_error.101] parse error at 8: parse error - unexpected end of input"); + "[json.exception.parse_error.101] parse error at 8: syntax error - unexpected end of input"); CHECK_THROWS_WITH(json::parser("{\"foo\":}").parse(), - "[json.exception.parse_error.101] parse error at 8: parse error - unexpected '}'"); + "[json.exception.parse_error.101] parse error at 8: syntax error - unexpected '}'"); CHECK_THROWS_WITH(json::parser("{\"foo\":1,}").parse(), - "[json.exception.parse_error.101] parse error at 10: parse error - unexpected '}'; expected string literal"); + "[json.exception.parse_error.101] parse error at 10: syntax error - unexpected '}'; expected string literal"); CHECK_THROWS_WITH(json::parser("}").parse(), - "[json.exception.parse_error.101] parse error at 1: parse error - unexpected '}'"); + "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '}'"); // missing/unexpected end of string CHECK_THROWS_AS(json::parser("\"").parse(), json::parse_error); @@ -471,25 +471,25 @@ TEST_CASE("parser class") CHECK_THROWS_AS(json::parser("\"\\u01").parse(), json::parse_error); CHECK_THROWS_AS(json::parser("\"\\u012").parse(), json::parse_error); CHECK_THROWS_WITH(json::parser("\"").parse(), - "[json.exception.parse_error.101] parse error at 1: parse error - unexpected '\"'"); + "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); CHECK_THROWS_WITH(json::parser("\"\\\"").parse(), - "[json.exception.parse_error.101] parse error at 1: parse error - unexpected '\"'"); + "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); CHECK_THROWS_WITH(json::parser("\"\\u\"").parse(), - "[json.exception.parse_error.101] parse error at 1: parse error - unexpected '\"'"); + "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); CHECK_THROWS_WITH(json::parser("\"\\u0\"").parse(), - "[json.exception.parse_error.101] parse error at 1: parse error - unexpected '\"'"); + "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); CHECK_THROWS_WITH(json::parser("\"\\u01\"").parse(), - "[json.exception.parse_error.101] parse error at 1: parse error - unexpected '\"'"); + "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); CHECK_THROWS_WITH(json::parser("\"\\u012\"").parse(), - "[json.exception.parse_error.101] parse error at 1: parse error - unexpected '\"'"); + "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); CHECK_THROWS_WITH(json::parser("\"\\u").parse(), - "[json.exception.parse_error.101] parse error at 1: parse error - unexpected '\"'"); + "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); CHECK_THROWS_WITH(json::parser("\"\\u0").parse(), - "[json.exception.parse_error.101] parse error at 1: parse error - unexpected '\"'"); + "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); CHECK_THROWS_WITH(json::parser("\"\\u01").parse(), - "[json.exception.parse_error.101] parse error at 1: parse error - unexpected '\"'"); + "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); CHECK_THROWS_WITH(json::parser("\"\\u012").parse(), - "[json.exception.parse_error.101] parse error at 1: parse error - unexpected '\"'"); + "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); // invalid escapes for (int c = 1; c < 128; ++c) @@ -523,7 +523,7 @@ TEST_CASE("parser class") { CHECK_THROWS_AS(json::parser(s.c_str()).parse(), json::parse_error); CHECK_THROWS_WITH(json::parser(s.c_str()).parse(), - "[json.exception.parse_error.101] parse error at 1: parse error - unexpected '\"'"); + "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); break; } } @@ -594,13 +594,13 @@ TEST_CASE("parser class") CHECK_THROWS_AS(json::parser(s4.c_str()).parse(), json::parse_error); CHECK_THROWS_WITH(json::parser(s1.c_str()).parse(), - "[json.exception.parse_error.101] parse error at 1: parse error - unexpected '\"'"); + "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); CHECK_THROWS_WITH(json::parser(s2.c_str()).parse(), - "[json.exception.parse_error.101] parse error at 1: parse error - unexpected '\"'"); + "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); CHECK_THROWS_WITH(json::parser(s3.c_str()).parse(), - "[json.exception.parse_error.101] parse error at 1: parse error - unexpected '\"'"); + "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); CHECK_THROWS_WITH(json::parser(s4.c_str()).parse(), - "[json.exception.parse_error.101] parse error at 1: parse error - unexpected '\"'"); + "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); } } } @@ -626,11 +626,11 @@ TEST_CASE("parser class") // test case to make sure no comma preceeds the first key CHECK_THROWS_AS(json::parser("{,\"key\": false}").parse(), json::parse_error); CHECK_THROWS_WITH(json::parser("{,\"key\": false}").parse(), - "[json.exception.parse_error.101] parse error at 2: parse error - unexpected ','"); + "[json.exception.parse_error.101] parse error at 2: syntax error - unexpected ','"); // test case to make sure an object is properly closed CHECK_THROWS_AS(json::parser("[{\"key\": false true]").parse(), json::parse_error); CHECK_THROWS_WITH(json::parser("[{\"key\": false true]").parse(), - "[json.exception.parse_error.101] parse error at 19: parse error - unexpected true literal; expected '}'"); + "[json.exception.parse_error.101] parse error at 19: syntax error - unexpected true literal; expected '}'"); // test case to make sure the callback is properly evaluated after reading a key { diff --git a/test/src/unit-deserialization.cpp b/test/src/unit-deserialization.cpp index cacf6687..49a642b1 100644 --- a/test/src/unit-deserialization.cpp +++ b/test/src/unit-deserialization.cpp @@ -92,7 +92,7 @@ TEST_CASE("deserialization") ss2 << "[\"foo\",1,2,3,false,{\"one\":1}"; CHECK_THROWS_AS(json::parse(ss1), json::parse_error); CHECK_THROWS_WITH(json::parse(ss2), - "[json.exception.parse_error.101] parse error at 30: parse error - unexpected end of input; expected ']'"); + "[json.exception.parse_error.101] parse error at 29: syntax error - unexpected end of input; expected ']'"); } SECTION("string") @@ -100,7 +100,7 @@ TEST_CASE("deserialization") json::string_t s = "[\"foo\",1,2,3,false,{\"one\":1}"; CHECK_THROWS_AS(json::parse(s), json::parse_error); CHECK_THROWS_WITH(json::parse(s), - "[json.exception.parse_error.101] parse error at 29: parse error - unexpected end of input; expected ']'"); + "[json.exception.parse_error.101] parse error at 29: syntax error - unexpected end of input; expected ']'"); } SECTION("operator<<") @@ -111,7 +111,7 @@ TEST_CASE("deserialization") json j; CHECK_THROWS_AS(j << ss1, json::parse_error); CHECK_THROWS_WITH(j << ss2, - "[json.exception.parse_error.101] parse error at 30: parse error - unexpected end of input; expected ']'"); + "[json.exception.parse_error.101] parse error at 29: syntax error - unexpected end of input; expected ']'"); } SECTION("operator>>") @@ -122,14 +122,14 @@ TEST_CASE("deserialization") json j; CHECK_THROWS_AS(ss1 >> j, json::parse_error); CHECK_THROWS_WITH(ss2 >> j, - "[json.exception.parse_error.101] parse error at 30: parse error - unexpected end of input; expected ']'"); + "[json.exception.parse_error.101] parse error at 29: syntax error - unexpected end of input; expected ']'"); } SECTION("user-defined string literal") { CHECK_THROWS_AS("[\"foo\",1,2,3,false,{\"one\":1}"_json, json::parse_error); CHECK_THROWS_WITH("[\"foo\",1,2,3,false,{\"one\":1}"_json, - "[json.exception.parse_error.101] parse error at 29: parse error - unexpected end of input; expected ']'"); + "[json.exception.parse_error.101] parse error at 29: syntax error - unexpected end of input; expected ']'"); } } diff --git a/test/src/unit-regression.cpp b/test/src/unit-regression.cpp index f7f4c350..2c09abaa 100644 --- a/test/src/unit-regression.cpp +++ b/test/src/unit-regression.cpp @@ -594,7 +594,7 @@ TEST_CASE("regression tests") // a parse error because of the EOF. CHECK_THROWS_AS(j << ss, json::parse_error); CHECK_THROWS_WITH(j << ss, - "[json.exception.parse_error.101] parse error at 1: parse error - unexpected end of input"); + "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input"); } SECTION("issue #389 - Integer-overflow (OSS-Fuzz issue 267)") @@ -911,6 +911,7 @@ TEST_CASE("regression tests") CHECK(j["bool_vector"].dump() == "[false,true,false,false]"); } + /* NOTE: m_line_buffer is not used any more SECTION("issue #495 - fill_line_buffer incorrectly tests m_stream for eof but not fail or bad bits") { SECTION("setting failbit") @@ -943,6 +944,7 @@ TEST_CASE("regression tests") CHECK_THROWS_WITH(l.fill_line_buffer(), "[json.exception.parse_error.111] parse error: bad input stream"); } } + */ SECTION("issue #504 - assertion error (OSS-Fuzz 856)") { diff --git a/test/src/unit-unicode.cpp b/test/src/unit-unicode.cpp index f7cf0ada..805e4c23 100644 --- a/test/src/unit-unicode.cpp +++ b/test/src/unit-unicode.cpp @@ -36,10 +36,11 @@ using nlohmann::json; TEST_CASE("Unicode", "[hide]") { + /* NOTE: to_unicode is not used any more SECTION("full enumeration of Unicode code points") { // lexer to call to_unicode on - json::lexer dummy_lexer(reinterpret_cast(""), 0); + json::lexer dummy_lexer("", 0); // create an escaped string from a code point const auto codepoint_to_unicode = [](std::size_t cp) @@ -118,6 +119,7 @@ TEST_CASE("Unicode", "[hide]") CHECK(j3 == j4); } } + */ SECTION("read all unicode characters") { From 99e0d8b339fca017c0f3b7a55d85a087bcaace0a Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Sun, 26 Mar 2017 15:29:08 +0200 Subject: [PATCH 02/44] :hammer: fixed \uxxxx parsing --- src/json.hpp | 37 +++++------ test/src/unit-class_parser.cpp | 115 +++++++++++++++++++++------------ 2 files changed, 90 insertions(+), 62 deletions(-) diff --git a/src/json.hpp b/src/json.hpp index 3caa8a6c..6cbcdf28 100644 --- a/src/json.hpp +++ b/src/json.hpp @@ -42,6 +42,7 @@ SOFTWARE. #include // forward_list #include // function, hash, less #include // initializer_list +#include // hex #include // istream, ostream #include // advance, begin, back_inserter, bidirectional_iterator_tag, distance, end, inserter, iterator, iterator_traits, next, random_access_iterator_tag, reverse_iterator #include // numeric_limits @@ -10542,36 +10543,30 @@ class basic_json // must be called after \u was read; returns following xxxx as hex or -1 when error int get_codepoint() { - // read xxxx of \uxxxx - std::vector buffer(5, '\0'); + // a mapping to discover hex numbers + static int8_t ascii_to_hex[256] = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}; + + int codepoint = 0; + for (size_t i = 0; i < 4; ++i) { - get(); - if (JSON_UNLIKELY(current != std::char_traits::eof())) + const int8_t digit = ascii_to_hex[static_cast(get())]; + if (JSON_UNLIKELY(digit == -1)) { - buffer[i] = static_cast(current); + return -1; } else { - // error message will be created by caller - return -1; + codepoint += digit; + } + + if (i != 3) + { + codepoint <<= 4; } } - char* endptr; - errno = 0; - - const int codepoint = static_cast(std::strtoul(buffer.data(), &endptr, 16)); - - if (JSON_LIKELY(errno == 0 and endptr == buffer.data() + 4)) - { - return codepoint; - } - else - { - // conversion incomplete or failure - return -1; - } + return codepoint; } token_type scan_string() diff --git a/test/src/unit-class_parser.cpp b/test/src/unit-class_parser.cpp index fe2a8fbe..01812259 100644 --- a/test/src/unit-class_parser.cpp +++ b/test/src/unit-class_parser.cpp @@ -91,18 +91,18 @@ TEST_CASE("parser class") // error: tab in string CHECK_THROWS_AS(json::parser("\"\t\"").parse(), json::parse_error); CHECK_THROWS_WITH(json::parser("\"\t\"").parse(), - "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); + "[json.exception.parse_error.101] parse error at 2: syntax error - invalid string: control characters (U+0000 through U+001f) must be escaped; last read '\"'"); // error: newline in string CHECK_THROWS_AS(json::parser("\"\n\"").parse(), json::parse_error); CHECK_THROWS_AS(json::parser("\"\r\"").parse(), json::parse_error); CHECK_THROWS_WITH(json::parser("\"\n\"").parse(), - "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); + "[json.exception.parse_error.101] parse error at 2: syntax error - invalid string: control characters (U+0000 through U+001f) must be escaped; last read '\"'"); CHECK_THROWS_WITH(json::parser("\"\r\"").parse(), - "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); + "[json.exception.parse_error.101] parse error at 2: syntax error - invalid string: control characters (U+0000 through U+001f) must be escaped; last read '\"'"); // error: backspace in string CHECK_THROWS_AS(json::parser("\"\b\"").parse(), json::parse_error); CHECK_THROWS_WITH(json::parser("\"\b\"").parse(), - "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); + "[json.exception.parse_error.101] parse error at 2: syntax error - invalid string: control characters (U+0000 through U+001f) must be escaped; last read '\"'"); // improve code coverage CHECK_THROWS_AS(json::parser("\uFF01").parse(), json::parse_error); CHECK_THROWS_AS(json::parser("[-4:1,]").parse(), json::parse_error); @@ -393,34 +393,37 @@ TEST_CASE("parser class") CHECK_THROWS_AS(json::parser("n").parse(), json::parse_error); CHECK_THROWS_AS(json::parser("nu").parse(), json::parse_error); CHECK_THROWS_AS(json::parser("nul").parse(), json::parse_error); - CHECK_THROWS_WITH(json::parser("n").parse(), "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected 'n'"); + CHECK_THROWS_WITH(json::parser("n").parse(), + "[json.exception.parse_error.101] parse error at 2: syntax error - invalid literal; expected 'null'; last read 'n'"); CHECK_THROWS_WITH(json::parser("nu").parse(), - "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected 'n'"); + "[json.exception.parse_error.101] parse error at 3: syntax error - invalid literal; expected 'null'; last read 'nu'"); CHECK_THROWS_WITH(json::parser("nul").parse(), - "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected 'n'"); + "[json.exception.parse_error.101] parse error at 4: syntax error - invalid literal; expected 'null'; last read 'nul'"); // unexpected end of true CHECK_THROWS_AS(json::parser("t").parse(), json::parse_error); CHECK_THROWS_AS(json::parser("tr").parse(), json::parse_error); CHECK_THROWS_AS(json::parser("tru").parse(), json::parse_error); - CHECK_THROWS_WITH(json::parser("t").parse(), "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected 't'"); + CHECK_THROWS_WITH(json::parser("t").parse(), + "[json.exception.parse_error.101] parse error at 2: syntax error - invalid literal; expected 'true'; last read 't'"); CHECK_THROWS_WITH(json::parser("tr").parse(), - "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected 't'"); + "[json.exception.parse_error.101] parse error at 3: syntax error - invalid literal; expected 'true'; last read 'tr'"); CHECK_THROWS_WITH(json::parser("tru").parse(), - "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected 't'"); + "[json.exception.parse_error.101] parse error at 4: syntax error - invalid literal; expected 'true'; last read 'tru'"); // unexpected end of false CHECK_THROWS_AS(json::parser("f").parse(), json::parse_error); CHECK_THROWS_AS(json::parser("fa").parse(), json::parse_error); CHECK_THROWS_AS(json::parser("fal").parse(), json::parse_error); CHECK_THROWS_AS(json::parser("fals").parse(), json::parse_error); - CHECK_THROWS_WITH(json::parser("f").parse(), "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected 'f'"); + CHECK_THROWS_WITH(json::parser("f").parse(), + "[json.exception.parse_error.101] parse error at 2: syntax error - invalid literal; expected 'false'; last read 'f'"); CHECK_THROWS_WITH(json::parser("fa").parse(), - "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected 'f'"); + "[json.exception.parse_error.101] parse error at 3: syntax error - invalid literal; expected 'false'; last read 'fa'"); CHECK_THROWS_WITH(json::parser("fal").parse(), - "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected 'f'"); + "[json.exception.parse_error.101] parse error at 4: syntax error - invalid literal; expected 'false'; last read 'fal'"); CHECK_THROWS_WITH(json::parser("fals").parse(), - "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected 'f'"); + "[json.exception.parse_error.101] parse error at 5: syntax error - invalid literal; expected 'false'; last read 'fals'"); // missing/unexpected end of array CHECK_THROWS_AS(json::parser("[").parse(), json::parse_error); @@ -471,25 +474,25 @@ TEST_CASE("parser class") CHECK_THROWS_AS(json::parser("\"\\u01").parse(), json::parse_error); CHECK_THROWS_AS(json::parser("\"\\u012").parse(), json::parse_error); CHECK_THROWS_WITH(json::parser("\"").parse(), - "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); + "[json.exception.parse_error.101] parse error at 2: syntax error - invalid string: missing closing quote; last read '\"'"); CHECK_THROWS_WITH(json::parser("\"\\\"").parse(), - "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); + "[json.exception.parse_error.101] parse error at 4: syntax error - invalid string: missing closing quote; last read '\"\\\"'"); CHECK_THROWS_WITH(json::parser("\"\\u\"").parse(), - "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); + "[json.exception.parse_error.101] parse error at 4: syntax error - invalid string: '\\u' must be followed by 4 hex digits; last read '\"\\u\"'"); CHECK_THROWS_WITH(json::parser("\"\\u0\"").parse(), - "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); + "[json.exception.parse_error.101] parse error at 5: syntax error - invalid string: '\\u' must be followed by 4 hex digits; last read '\"\\u0\"'"); CHECK_THROWS_WITH(json::parser("\"\\u01\"").parse(), - "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); + "[json.exception.parse_error.101] parse error at 6: syntax error - invalid string: '\\u' must be followed by 4 hex digits; last read '\"\\u01\"'"); CHECK_THROWS_WITH(json::parser("\"\\u012\"").parse(), - "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); + "[json.exception.parse_error.101] parse error at 7: syntax error - invalid string: '\\u' must be followed by 4 hex digits; last read '\"\\u012\"'"); CHECK_THROWS_WITH(json::parser("\"\\u").parse(), - "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); + "[json.exception.parse_error.101] parse error at 4: syntax error - invalid string: '\\u' must be followed by 4 hex digits; last read '\"\\u'"); CHECK_THROWS_WITH(json::parser("\"\\u0").parse(), - "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); + "[json.exception.parse_error.101] parse error at 5: syntax error - invalid string: '\\u' must be followed by 4 hex digits; last read '\"\\u0'"); CHECK_THROWS_WITH(json::parser("\"\\u01").parse(), - "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); + "[json.exception.parse_error.101] parse error at 6: syntax error - invalid string: '\\u' must be followed by 4 hex digits; last read '\"\\u01'"); CHECK_THROWS_WITH(json::parser("\"\\u012").parse(), - "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); + "[json.exception.parse_error.101] parse error at 7: syntax error - invalid string: '\\u' must be followed by 4 hex digits; last read '\"\\u012'"); // invalid escapes for (int c = 1; c < 128; ++c) @@ -522,8 +525,12 @@ TEST_CASE("parser class") default: { CHECK_THROWS_AS(json::parser(s.c_str()).parse(), json::parse_error); - CHECK_THROWS_WITH(json::parser(s.c_str()).parse(), - "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); + // only check error message if c is not a control character + if (c > 0x1f) + { + CHECK_THROWS_WITH(json::parser(s.c_str()).parse(), + "[json.exception.parse_error.101] parse error at 3: syntax error - invalid string: forbidden character after backspace; last read '\"\\" + std::string(1, c) + "'"); + } break; } } @@ -581,26 +588,52 @@ TEST_CASE("parser class") if (valid(c)) { + CAPTURE(s1); CHECK_NOTHROW(json::parser(s1.c_str()).parse()); + CAPTURE(s2); CHECK_NOTHROW(json::parser(s2.c_str()).parse()); + CAPTURE(s3); CHECK_NOTHROW(json::parser(s3.c_str()).parse()); + CAPTURE(s4); CHECK_NOTHROW(json::parser(s4.c_str()).parse()); } else { + CAPTURE(s1); CHECK_THROWS_AS(json::parser(s1.c_str()).parse(), json::parse_error); - CHECK_THROWS_AS(json::parser(s2.c_str()).parse(), json::parse_error); - CHECK_THROWS_AS(json::parser(s3.c_str()).parse(), json::parse_error); - CHECK_THROWS_AS(json::parser(s4.c_str()).parse(), json::parse_error); + // only check error message if c is not a control character + if (c > 0x1f) + { + CHECK_THROWS_WITH(json::parser(s1.c_str()).parse(), + "[json.exception.parse_error.101] parse error at 7: syntax error - invalid string: '\\u' must be followed by 4 hex digits; last read '" + s1.substr(0, 7) + "'"); + } - CHECK_THROWS_WITH(json::parser(s1.c_str()).parse(), - "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); - CHECK_THROWS_WITH(json::parser(s2.c_str()).parse(), - "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); - CHECK_THROWS_WITH(json::parser(s3.c_str()).parse(), - "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); - CHECK_THROWS_WITH(json::parser(s4.c_str()).parse(), - "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); + CAPTURE(s2); + CHECK_THROWS_AS(json::parser(s2.c_str()).parse(), json::parse_error); + // only check error message if c is not a control character + if (c > 0x1f) + { + CHECK_THROWS_WITH(json::parser(s2.c_str()).parse(), + "[json.exception.parse_error.101] parse error at 6: syntax error - invalid string: '\\u' must be followed by 4 hex digits; last read '" + s2.substr(0, 6) + "'"); + } + + CAPTURE(s3); + CHECK_THROWS_AS(json::parser(s3.c_str()).parse(), json::parse_error); + // only check error message if c is not a control character + if (c > 0x1f) + { + CHECK_THROWS_WITH(json::parser(s3.c_str()).parse(), + "[json.exception.parse_error.101] parse error at 5: syntax error - invalid string: '\\u' must be followed by 4 hex digits; last read '" + s3.substr(0, 5) + "'"); + } + + CAPTURE(s4); + CHECK_THROWS_AS(json::parser(s4.c_str()).parse(), json::parse_error); + // only check error message if c is not a control character + if (c > 0x1f) + { + CHECK_THROWS_WITH(json::parser(s4.c_str()).parse(), + "[json.exception.parse_error.101] parse error at 4: syntax error - invalid string: '\\u' must be followed by 4 hex digits; last read '" + s4.substr(0, 4) + "'"); + } } } } @@ -608,17 +641,17 @@ TEST_CASE("parser class") // missing part of a surrogate pair CHECK_THROWS_AS(json::parse("\"\\uD80C\""), json::parse_error); CHECK_THROWS_WITH(json::parse("\"\\uD80C\""), - "[json.exception.parse_error.102] parse error at 8: missing low surrogate"); + "[json.exception.parse_error.101] parse error at 8: syntax error - invalid string: missing low surrogate; last read '\"\\uD80C\"'"); // invalid surrogate pair CHECK_THROWS_AS(json::parse("\"\\uD80C\\uD80C\""), json::parse_error); CHECK_THROWS_AS(json::parse("\"\\uD80C\\u0000\""), json::parse_error); CHECK_THROWS_AS(json::parse("\"\\uD80C\\uFFFF\""), json::parse_error); CHECK_THROWS_WITH(json::parse("\"\\uD80C\\uD80C\""), - "[json.exception.parse_error.102] parse error at 14: missing or wrong low surrogate"); + "[json.exception.parse_error.101] parse error at 13: syntax error - invalid string: invalid low surrogate; last read '\"\\uD80C\\uD80C'"); CHECK_THROWS_WITH(json::parse("\"\\uD80C\\u0000\""), - "[json.exception.parse_error.102] parse error at 14: missing or wrong low surrogate"); + "[json.exception.parse_error.101] parse error at 13: syntax error - invalid string: invalid low surrogate; last read '\"\\uD80C\\u0000'"); CHECK_THROWS_WITH(json::parse("\"\\uD80C\\uFFFF\""), - "[json.exception.parse_error.102] parse error at 14: missing or wrong low surrogate"); + "[json.exception.parse_error.101] parse error at 13: syntax error - invalid string: invalid low surrogate; last read '\"\\uD80C\\uFFFF'"); } SECTION("tests found by mutate++") From 3b069b4b4c135be91a7885d6795ca1e7fcff2eb7 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Sun, 26 Mar 2017 16:19:07 +0200 Subject: [PATCH 03/44] :hammer: adjusted more expected exceptions --- test/src/unit-class_parser.cpp | 44 +++++++++++++++++----------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/test/src/unit-class_parser.cpp b/test/src/unit-class_parser.cpp index 01812259..36451042 100644 --- a/test/src/unit-class_parser.cpp +++ b/test/src/unit-class_parser.cpp @@ -310,21 +310,21 @@ TEST_CASE("parser class") CHECK_THROWS_WITH(json::parser("-01").parse(), "[json.exception.parse_error.101] parse error at 3: syntax error - unexpected '-01'"); CHECK_THROWS_WITH(json::parser("--1").parse(), - "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '-'"); + "[json.exception.parse_error.101] parse error at 2: syntax error - invalid number; expected digit after '-'; last read '--'"); CHECK_THROWS_WITH(json::parser("1.").parse(), - "[json.exception.parse_error.101] parse error at 2: syntax error - unexpected '.'; expected end of input"); + "[json.exception.parse_error.101] parse error at 3: syntax error - invalid number; expected digit after '.'; last read '1.'"); CHECK_THROWS_WITH(json::parser("1E").parse(), - "[json.exception.parse_error.101] parse error at 2: syntax error - unexpected 'E'; expected end of input"); + "[json.exception.parse_error.101] parse error at 3: syntax error - invalid number; expected '+', '-', or digit after exponent; last read '1E'"); CHECK_THROWS_WITH(json::parser("1E-").parse(), - "[json.exception.parse_error.101] parse error at 2: syntax error - unexpected 'E'; expected end of input"); + "[json.exception.parse_error.101] parse error at 4: syntax error - invalid number; expected digit after exponent sign; last read '1E-'"); CHECK_THROWS_WITH(json::parser("1.E1").parse(), "[json.exception.parse_error.101] parse error at 2: syntax error - unexpected '.'; expected end of input"); CHECK_THROWS_WITH(json::parser("-1E").parse(), - "[json.exception.parse_error.101] parse error at 3: syntax error - unexpected 'E'; expected end of input"); + "[json.exception.parse_error.101] parse error at 4: syntax error - invalid number; expected '+', '-', or digit after exponent; last read '-1E'"); CHECK_THROWS_WITH(json::parser("-0E#").parse(), - "[json.exception.parse_error.101] parse error at 3: syntax error - unexpected 'E'; expected end of input"); + "[json.exception.parse_error.101] parse error at 4: syntax error - invalid number; expected '+', '-', or digit after exponent; last read '-0E#'"); CHECK_THROWS_WITH(json::parser("-0E-#").parse(), - "[json.exception.parse_error.101] parse error at 3: syntax error - unexpected 'E'; expected end of input"); + "[json.exception.parse_error.101] parse error at 5: syntax error - invalid number; expected digit after exponent sign; last read '-0E-#'"); CHECK_THROWS_WITH(json::parser("-0#").parse(), "[json.exception.parse_error.101] parse error at 3: syntax error - unexpected '#'; expected end of input"); CHECK_THROWS_WITH(json::parser("-0.0:").parse(), @@ -336,7 +336,7 @@ TEST_CASE("parser class") CHECK_THROWS_WITH(json::parser("-0e0-:").parse(), "[json.exception.parse_error.101] parse error at 5: syntax error - unexpected '-'; expected end of input"); CHECK_THROWS_WITH(json::parser("-0e-:").parse(), - "[json.exception.parse_error.101] parse error at 3: syntax error - unexpected 'e'; expected end of input"); + "[json.exception.parse_error.101] parse error at 5: syntax error - invalid number; expected digit after exponent sign; last read '-0e-:'"); CHECK_THROWS_WITH(json::parser("-0f").parse(), "[json.exception.parse_error.101] parse error at 3: syntax error - unexpected 'f'; expected end of input"); } @@ -361,33 +361,33 @@ TEST_CASE("parser class") CHECK_THROWS_AS(json::parser("1E/").parse(), json::parse_error); CHECK_THROWS_AS(json::parser("1E:").parse(), json::parse_error); CHECK_THROWS_WITH(json::parser("0.").parse(), - "[json.exception.parse_error.101] parse error at 2: syntax error - unexpected '.'; expected end of input"); + "[json.exception.parse_error.101] parse error at 3: syntax error - invalid number; expected digit after '.'; last read '0.'"); CHECK_THROWS_WITH(json::parser("-").parse(), - "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '-'"); + "[json.exception.parse_error.101] parse error at 2: syntax error - invalid number; expected digit after '-'; last read '-'"); CHECK_THROWS_WITH(json::parser("--").parse(), - "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '-'"); + "[json.exception.parse_error.101] parse error at 2: syntax error - invalid number; expected digit after '-'; last read '--'"); CHECK_THROWS_WITH(json::parser("-0.").parse(), - "[json.exception.parse_error.101] parse error at 3: syntax error - unexpected '.'; expected end of input"); + "[json.exception.parse_error.101] parse error at 4: syntax error - invalid number; expected digit after '.'; last read '-0.'"); CHECK_THROWS_WITH(json::parser("-.").parse(), - "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '-'"); + "[json.exception.parse_error.101] parse error at 2: syntax error - invalid number; expected digit after '-'; last read '-.'"); CHECK_THROWS_WITH(json::parser("-:").parse(), - "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '-'"); + "[json.exception.parse_error.101] parse error at 2: syntax error - invalid number; expected digit after '-'; last read '-:'"); CHECK_THROWS_WITH(json::parser("0.:").parse(), - "[json.exception.parse_error.101] parse error at 2: syntax error - unexpected '.'; expected end of input"); + "[json.exception.parse_error.101] parse error at 3: syntax error - invalid number; expected digit after '.'; last read '0.:'"); CHECK_THROWS_WITH(json::parser("e.").parse(), - "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected 'e'"); + "[json.exception.parse_error.101] parse error at 1: syntax error - invalid literal; last read 'e'"); CHECK_THROWS_WITH(json::parser("1e.").parse(), - "[json.exception.parse_error.101] parse error at 2: syntax error - unexpected 'e'; expected end of input"); + "[json.exception.parse_error.101] parse error at 3: syntax error - invalid number; expected '+', '-', or digit after exponent; last read '1e.'"); CHECK_THROWS_WITH(json::parser("1e/").parse(), - "[json.exception.parse_error.101] parse error at 2: syntax error - unexpected 'e'; expected end of input"); + "[json.exception.parse_error.101] parse error at 3: syntax error - invalid number; expected '+', '-', or digit after exponent; last read '1e/'"); CHECK_THROWS_WITH(json::parser("1e:").parse(), - "[json.exception.parse_error.101] parse error at 2: syntax error - unexpected 'e'; expected end of input"); + "[json.exception.parse_error.101] parse error at 3: syntax error - invalid number; expected '+', '-', or digit after exponent; last read '1e:'"); CHECK_THROWS_WITH(json::parser("1E.").parse(), - "[json.exception.parse_error.101] parse error at 2: syntax error - unexpected 'E'; expected end of input"); + "[json.exception.parse_error.101] parse error at 3: syntax error - invalid number; expected '+', '-', or digit after exponent; last read '1E.'"); CHECK_THROWS_WITH(json::parser("1E/").parse(), - "[json.exception.parse_error.101] parse error at 2: syntax error - unexpected 'E'; expected end of input"); + "[json.exception.parse_error.101] parse error at 3: syntax error - invalid number; expected '+', '-', or digit after exponent; last read '1E/'"); CHECK_THROWS_WITH(json::parser("1E:").parse(), - "[json.exception.parse_error.101] parse error at 2: syntax error - unexpected 'E'; expected end of input"); + "[json.exception.parse_error.101] parse error at 3: syntax error - invalid number; expected '+', '-', or digit after exponent; last read '1E:'"); // unexpected end of null CHECK_THROWS_AS(json::parser("n").parse(), json::parse_error); From 06e2a291b1492dbabc6929ed77a63bda7427ced0 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Sun, 26 Mar 2017 17:26:41 +0200 Subject: [PATCH 04/44] :hammer: fixed number parsing --- src/json.hpp | 65 +++++++++++++++++++++------------- test/src/unit-class_parser.cpp | 2 +- test/src/unit-regression.cpp | 1 + test/src/unit-testsuites.cpp | 1 + 4 files changed, 43 insertions(+), 26 deletions(-) diff --git a/src/json.hpp b/src/json.hpp index 6cbcdf28..e47abab4 100644 --- a/src/json.hpp +++ b/src/json.hpp @@ -222,8 +222,8 @@ class parse_error : public exception const size_t byte; private: - parse_error(int id, size_t byte_, const char* what_arg) - : exception(id, what_arg), byte(byte_) + parse_error(int id_, size_t byte_, const char* what_arg) + : exception(id_, what_arg), byte(byte_) {} }; @@ -261,8 +261,8 @@ class invalid_iterator : public exception } private: - invalid_iterator(int id, const char* what_arg) - : exception(id, what_arg) + invalid_iterator(int id_, const char* what_arg) + : exception(id_, what_arg) {} }; @@ -300,8 +300,8 @@ class type_error : public exception } private: - type_error(int id, const char* what_arg) - : exception(id, what_arg) + type_error(int id_, const char* what_arg) + : exception(id_, what_arg) {} }; @@ -331,8 +331,8 @@ class out_of_range : public exception } private: - out_of_range(int id, const char* what_arg) - : exception(id, what_arg) + out_of_range(int id_, const char* what_arg) + : exception(id_, what_arg) {} }; @@ -357,8 +357,8 @@ class other_error : public exception } private: - other_error(int id, const char* what_arg) - : exception(id, what_arg) + other_error(int id_, const char* what_arg) + : exception(id_, what_arg) {} }; @@ -10828,24 +10828,33 @@ class basic_json add('\0'); --yylen; - if (has_exp or has_point) + // the conversion + char* endptr = nullptr; + + // try to parse integers first and fall back to floats + if (not has_exp and not has_point) { - value_float = std::strtod(yytext.data(), nullptr); - return token_type::value_float; + errno = 0; + if (has_sign) + { + value_integer = std::strtoll(yytext.data(), &endptr, 10); + if (JSON_LIKELY(errno == 0 and endptr == yytext.data() + yylen)) + { + return token_type::value_integer; + } + } + else + { + value_unsigned = std::strtoull(yytext.data(), &endptr, 10); + if (JSON_LIKELY(errno == 0 and endptr == yytext.data() + yylen)) + { + return token_type::value_unsigned; + } + } } - if (has_sign) - { - char* endptr = nullptr; - value_integer = std::strtoll(yytext.data(), &endptr, 10); - return token_type::value_integer; - } - else - { - char* endptr = nullptr; - value_unsigned = std::strtoull(yytext.data(), &endptr, 10); - return token_type::value_unsigned; - } + value_float = std::strtod(yytext.data(), nullptr); + return token_type::value_float; } token_type scan_true() @@ -10986,6 +10995,12 @@ class basic_json case lexer::token_type::value_float: { + // throw in case of infinity or NAN + if (not std::isfinite(value_float)) + { + JSON_THROW(out_of_range::create(406, "number overflow parsing '" + get_token_string() + "'")); + } + result.m_type = value_t::number_float; result.m_value = static_cast(value_float); return true; diff --git a/test/src/unit-class_parser.cpp b/test/src/unit-class_parser.cpp index 36451042..3e5b2871 100644 --- a/test/src/unit-class_parser.cpp +++ b/test/src/unit-class_parser.cpp @@ -529,7 +529,7 @@ TEST_CASE("parser class") if (c > 0x1f) { CHECK_THROWS_WITH(json::parser(s.c_str()).parse(), - "[json.exception.parse_error.101] parse error at 3: syntax error - invalid string: forbidden character after backspace; last read '\"\\" + std::string(1, c) + "'"); + "[json.exception.parse_error.101] parse error at 3: syntax error - invalid string: forbidden character after backspace; last read '\"\\" + std::string(1, static_cast(c)) + "'"); } break; } diff --git a/test/src/unit-regression.cpp b/test/src/unit-regression.cpp index 2c09abaa..df685d2c 100644 --- a/test/src/unit-regression.cpp +++ b/test/src/unit-regression.cpp @@ -216,6 +216,7 @@ TEST_CASE("regression tests") { json a = {1, 2, 3}; json::reverse_iterator rit = ++a.rbegin(); + CHECK(*rit == json(2)); } { json a = {1, 2, 3}; diff --git a/test/src/unit-testsuites.cpp b/test/src/unit-testsuites.cpp index 8d6a8162..c46a4ffa 100644 --- a/test/src/unit-testsuites.cpp +++ b/test/src/unit-testsuites.cpp @@ -305,6 +305,7 @@ TEST_CASE("compliance tests from nativejson-benchmark") std::string json_string( (std::istreambuf_iterator(f) ), (std::istreambuf_iterator()) ); + CAPTURE(json_string); json j = json::parse(json_string); CHECK(j.dump() == json_string); } From 342552797093683ec76316b8d588056720a0813d Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Sun, 26 Mar 2017 18:12:58 +0200 Subject: [PATCH 05/44] :hammer: added code for locale-independent number parsing --- errors.txt | 7 ------- src/json.hpp | 25 +++++++++++++++++++++---- 2 files changed, 21 insertions(+), 11 deletions(-) delete mode 100644 errors.txt diff --git a/errors.txt b/errors.txt deleted file mode 100644 index d3a6c6db..00000000 --- a/errors.txt +++ /dev/null @@ -1,7 +0,0 @@ -- test/test-class_parser - - 617 failed -- test/test-regression - - 11 failed -- test/test-testsuites - - 43 failed - diff --git a/src/json.hpp b/src/json.hpp index e47abab4..2ab32885 100644 --- a/src/json.hpp +++ b/src/json.hpp @@ -10517,12 +10517,13 @@ class basic_json } explicit lexer(std::istream& i) - // : ia(new input_stream_adapter(i)) - : ia(new cached_input_stream_adapter(i)) + : ia(new cached_input_stream_adapter(i)), + decimal_point_char(get_decimal_point()) {} lexer(const char* buff, const size_t len) - : ia(new input_buffer_adapter(buff, len)) + : ia(new input_buffer_adapter(buff, len)), + decimal_point_char(get_decimal_point()) {} ~lexer() @@ -10536,6 +10537,18 @@ class basic_json lexer operator=(const lexer&) = delete; private: + ///////////////////// + // locales + ///////////////////// + + /// return the locale-dependent decimal point + static char get_decimal_point() noexcept + { + const auto loc = localeconv(); + assert(loc != nullptr); + return (loc->decimal_point == nullptr) ? '.' : loc->decimal_point[0]; + } + ///////////////////// // scan functions ///////////////////// @@ -10815,7 +10828,8 @@ class basic_json return token_type::parse_error; } - add(current); + // add current character and fix decimal point + add((state == 4) ? decimal_point_char : current); get(); old_state = state; state = lookup[state][static_cast(current)]; @@ -11103,6 +11117,9 @@ class basic_json long long value_integer = 0; unsigned long long value_unsigned = 0; double value_float = 0; + + // the decimal point + const char decimal_point_char = '\0'; }; /*! From f75e195db3a057295c296b8aae66099fdf462d11 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Sun, 26 Mar 2017 18:45:04 +0200 Subject: [PATCH 06/44] :hammer: added code for user-defined number types --- src/json.hpp | 41 ++++++++++++++++++++++++++++++----------- 1 file changed, 30 insertions(+), 11 deletions(-) diff --git a/src/json.hpp b/src/json.hpp index 2ab32885..22f65979 100644 --- a/src/json.hpp +++ b/src/json.hpp @@ -10771,6 +10771,23 @@ class basic_json } } + // overloaded wrappers for strtod/strtof/strtold + // that will be called from parse + static void strtof(float& f, const char* str, char** endptr) noexcept + { + f = std::strtof(str, endptr); + } + + static void strtof(double& f, const char* str, char** endptr) noexcept + { + f = std::strtod(str, endptr); + } + + static void strtof(long double& f, const char* str, char** endptr) noexcept + { + f = std::strtold(str, endptr); + } + token_type scan_number() { static unsigned char lookup[9][256] = @@ -10851,23 +10868,25 @@ class basic_json errno = 0; if (has_sign) { - value_integer = std::strtoll(yytext.data(), &endptr, 10); - if (JSON_LIKELY(errno == 0 and endptr == yytext.data() + yylen)) + const auto x = std::strtoll(yytext.data(), &endptr, 10); + value_integer = static_cast(x); + if (JSON_LIKELY(errno == 0 and endptr == yytext.data() + yylen and value_integer == x)) { return token_type::value_integer; } } else { - value_unsigned = std::strtoull(yytext.data(), &endptr, 10); - if (JSON_LIKELY(errno == 0 and endptr == yytext.data() + yylen)) + const auto x = std::strtoull(yytext.data(), &endptr, 10); + value_unsigned = static_cast(x); + if (JSON_LIKELY(errno == 0 and endptr == yytext.data() + yylen and value_unsigned == x)) { return token_type::value_unsigned; } } } - value_float = std::strtod(yytext.data(), nullptr); + strtof(value_float, yytext.data(), nullptr); return token_type::value_float; } @@ -10996,14 +11015,14 @@ class basic_json case lexer::token_type::value_unsigned: { result.m_type = value_t::number_unsigned; - result.m_value = static_cast(value_unsigned); + result.m_value = value_unsigned; return true; } case lexer::token_type::value_integer: { result.m_type = value_t::number_integer; - result.m_value = static_cast(value_integer); + result.m_value = value_integer; return true; } @@ -11016,7 +11035,7 @@ class basic_json } result.m_type = value_t::number_float; - result.m_value = static_cast(value_float); + result.m_value = value_float; return true; } @@ -11114,9 +11133,9 @@ class basic_json std::string error_message = ""; // number values - long long value_integer = 0; - unsigned long long value_unsigned = 0; - double value_float = 0; + number_integer_t value_integer = 0; + number_unsigned_t value_unsigned = 0; + number_float_t value_float = 0; // the decimal point const char decimal_point_char = '\0'; From b0c47abd889d47c89deeeb8632b2fd194b6fe9b3 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Mon, 27 Mar 2017 22:58:44 +0200 Subject: [PATCH 07/44] :hammer: rewrote string parser to cope with UTF-8 --- src/json.hpp | 281 +++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 264 insertions(+), 17 deletions(-) diff --git a/src/json.hpp b/src/json.hpp index 22f65979..9dd50447 100644 --- a/src/json.hpp +++ b/src/json.hpp @@ -10587,8 +10587,20 @@ class basic_json // reset yytext (ignore opening quote) reset(); + // we entered the function by reading an open quote + assert (current == '\"'); + + static unsigned char next[256] = {17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 5, 2, 2, 6, 3, 3, 3, 7, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18}; + + // state variable + int state = -1; + + // whether the state is already set + bool state_set = false; + while (true) { + // get next character get(); // end of file while parsing string @@ -10598,25 +10610,240 @@ class basic_json return token_type::parse_error; } - // control character - if (JSON_UNLIKELY('\x00' <= current and current <= '\x1f')) - { - error_message = "invalid string: control characters (U+0000 through U+001f) must be escaped"; - return token_type::parse_error; - } + // after coping with EOF, we only cope with bytes + //assert(0 <= current and current <= 255); + unsigned char ch = static_cast(current); - switch (current) + // get next state + state = state_set ? state : next[ch]; + // reset variable + state_set = false; + + // 'add': 0, + // 'add_check1': 1, + // 'add_check2': 2, + // 'add_check3': 3, + // 'add_check_e0': 4, + // 'add_check_ed': 5, + // 'add_check_f0': 6, + // 'add_check_f4': 7, + // 'check1': 8, + // 'check2': 9, + // 'check3': 10, + // 'check_e0': 11, + // 'check_ed': 12, + // 'check_f0': 13, + // 'check_f4': 14, + // 'escape': 15, + // 'end': 16, + // 'error_invalid': 17, + // 'error_utf8': 18 + assert(0 <= state and state <= 18); + + switch (state) { - // closing quote - case '\"': + // add + case 0: { - add('\0'); - --yylen; - return token_type::value_string; + add(current); + break; } - // escape sequence - case '\\': + // add_check1 + case 1: + { + add(current); + // next state is check1 + state = 8; + state_set = true; + break; + } + + // add_check2 + case 2: + { + add(current); + // next state is check2 + state = 9; + state_set = true; + break; + } + + // add_check3 + case 3: + { + add(current); + // next state is check3 + state = 10; + state_set = true; + break; + } + + // add_check_e0 + case 4: + { + add(current); + // next state is check_e0 + state = 11; + state_set = true; + break; + } + + // add_check_ed + case 5: + { + add(current); + // next state is check_ed + state = 12; + state_set = true; + break; + } + + // add_check_f0 + case 6: + { + add(current); + // next state is check_f0 + state = 13; + state_set = true; + break; + } + + // add_check_f4 + case 7: + { + add(current); + // next state is check_f4 + state = 14; + state_set = true; + break; + } + + // check1 + case 8: + { + if (JSON_LIKELY(0x80 <= ch and ch <= 0xBF)) + { + add(current); + break; + } + else + { + error_message = "invalid string: not well-formed UTF-8 byte"; + return token_type::parse_error; + } + } + + // check2 + case 9: + { + if (JSON_LIKELY(0x80 <= ch and ch <= 0xBF)) + { + add(current); + // next state is check1 + state = 8; + state_set = true; + break; + } + else + { + error_message = "invalid string: not well-formed UTF-8 byte"; + return token_type::parse_error; + } + } + + // check3 + case 10: + { + if (JSON_LIKELY(0x80 <= ch and ch <= 0xBF)) + { + add(current); + // next state is check2 + state = 9; + state_set = true; + break; + } + else + { + error_message = "invalid string: not well-formed UTF-8 byte"; + return token_type::parse_error; + } + } + + // check_e0 + case 11: + { + if (JSON_LIKELY(0xA0 <= ch and ch <= 0xBF)) + { + add(current); + // next state is check1 + state = 8; + state_set = true; + break; + } + else + { + error_message = "invalid string: not well-formed UTF-8 byte"; + return token_type::parse_error; + } + } + + // check_ed + case 12: + { + if (JSON_LIKELY(0x80 <= ch and ch <= 0x9F)) + { + add(current); + // next state is check1 + state = 8; + state_set = true; + break; + } + else + { + error_message = "invalid string: not well-formed UTF-8 byte"; + return token_type::parse_error; + } + } + + // check_f0 + case 13: + { + if (JSON_LIKELY(0x90 <= ch and ch <= 0xBF)) + { + add(current); + // next state is check2 + state = 9; + state_set = true; + break; + } + else + { + error_message = "invalid string: not well-formed UTF-8 byte"; + return token_type::parse_error; + } + } + + // check_f4 + case 14: + { + if (JSON_LIKELY(0x80 <= ch and ch <= 0x8F)) + { + add(current); + // next state is check2 + state = 9; + state_set = true; + break; + } + else + { + error_message = "invalid string: not well-formed UTF-8 byte"; + return token_type::parse_error; + } + } + + // escape + case 15: { switch (get()) { @@ -10761,11 +10988,31 @@ class basic_json break; } - // any other character + // end + case 16: + { + add('\0'); + --yylen; + return token_type::value_string; + } + + // error_invalid + case 17: + { + error_message = "invalid string: control characters (U+0000 through U+001f) must be escaped"; + return token_type::parse_error; + } + + // error_utf8 + case 18: + { + error_message = "invalid string: not well-formed UTF-8 byte"; + return token_type::parse_error; + } + default: { - add(current); - break; + assert(false); } } } From bbb6bd9de560a300aa9fd6d19e07876c687a7868 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Tue, 28 Mar 2017 17:00:03 +0200 Subject: [PATCH 08/44] :hammer: fixing last failing test cases --- src/json.hpp | 27 +++++++++++++--------- test/src/unit-class_parser.cpp | 14 ++++++------ test/src/unit-testsuites.cpp | 41 ++++++++++++---------------------- 3 files changed, 37 insertions(+), 45 deletions(-) diff --git a/src/json.hpp b/src/json.hpp index 9dd50447..cbd630e4 100644 --- a/src/json.hpp +++ b/src/json.hpp @@ -7179,7 +7179,7 @@ class basic_json static basic_json parse(const CharT s, const parser_callback_t cb = nullptr) { - return parser(reinterpret_cast(s), cb).parse(); + return parser(reinterpret_cast(s), cb).parse(true); } /*! @@ -7214,7 +7214,7 @@ class basic_json static basic_json parse(std::istream& i, const parser_callback_t cb = nullptr) { - return parser(i, cb).parse(); + return parser(i, cb).parse(true); } /*! @@ -7223,7 +7223,7 @@ class basic_json static basic_json parse(std::istream&& i, const parser_callback_t cb = nullptr) { - return parser(i, cb).parse(); + return parser(i, cb).parse(true); } /*! @@ -7295,10 +7295,10 @@ class basic_json // to generate "unexpected EOF" error message if (std::distance(first, last) <= 0) { - return parser("").parse(); + return parser("").parse(true); } - return parser(first, last, cb).parse(); + return parser(first, last, cb).parse(true); } /*! @@ -7386,7 +7386,7 @@ class basic_json */ friend std::istream& operator<<(basic_json& j, std::istream& i) { - j = parser(i).parse(); + j = parser(i).parse(false); return i; } @@ -7396,7 +7396,7 @@ class basic_json */ friend std::istream& operator>>(std::istream& i, basic_json& j) { - j = parser(i).parse(); + j = parser(i).parse(false); return i; } @@ -11043,7 +11043,7 @@ class basic_json {10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10}, {9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 4, 9, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 5, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 5, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9}, {9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 4, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 5, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 5, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9}, - {10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 5, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 5, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10}, + {10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10}, {10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 8, 10, 8, 10, 10, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10}, {9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 5, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 5, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9}, {9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9}, @@ -11423,11 +11423,14 @@ class basic_json /*! @brief public parser interface + + @param[in] strict whether to expect the last token to be EOF + @throw parse_error.101 in case of an unexpected token @throw parse_error.102 if to_unicode fails or surrogate error @throw parse_error.103 if to_unicode fails */ - basic_json parse() + basic_json parse(const bool strict = true) { // read first token get_token(); @@ -11435,8 +11438,10 @@ class basic_json basic_json result = parse_internal(true); result.assert_invariant(); - // FIXME: this is new behavior - //expect(lexer::token_type::end_of_input); + if (strict) + { + expect(lexer::token_type::end_of_input); + } // return parser result and replace it with null in case the // top-level value was discarded by the callback function diff --git a/test/src/unit-class_parser.cpp b/test/src/unit-class_parser.cpp index 3e5b2871..f36eb900 100644 --- a/test/src/unit-class_parser.cpp +++ b/test/src/unit-class_parser.cpp @@ -306,9 +306,9 @@ TEST_CASE("parser class") CHECK_THROWS_AS(json::parser("+0").parse(), json::parse_error); CHECK_THROWS_WITH(json::parser("01").parse(), - "[json.exception.parse_error.101] parse error at 2: syntax error - unexpected '01'"); + "[json.exception.parse_error.101] parse error at 2: syntax error - unexpected number literal; expected end of input"); CHECK_THROWS_WITH(json::parser("-01").parse(), - "[json.exception.parse_error.101] parse error at 3: syntax error - unexpected '-01'"); + "[json.exception.parse_error.101] parse error at 3: syntax error - unexpected number literal; expected end of input"); CHECK_THROWS_WITH(json::parser("--1").parse(), "[json.exception.parse_error.101] parse error at 2: syntax error - invalid number; expected digit after '-'; last read '--'"); CHECK_THROWS_WITH(json::parser("1.").parse(), @@ -318,7 +318,7 @@ TEST_CASE("parser class") CHECK_THROWS_WITH(json::parser("1E-").parse(), "[json.exception.parse_error.101] parse error at 4: syntax error - invalid number; expected digit after exponent sign; last read '1E-'"); CHECK_THROWS_WITH(json::parser("1.E1").parse(), - "[json.exception.parse_error.101] parse error at 2: syntax error - unexpected '.'; expected end of input"); + "[json.exception.parse_error.101] parse error at 3: syntax error - invalid number; expected digit after '.'; last read '1.E'"); CHECK_THROWS_WITH(json::parser("-1E").parse(), "[json.exception.parse_error.101] parse error at 4: syntax error - invalid number; expected '+', '-', or digit after exponent; last read '-1E'"); CHECK_THROWS_WITH(json::parser("-0E#").parse(), @@ -326,19 +326,19 @@ TEST_CASE("parser class") CHECK_THROWS_WITH(json::parser("-0E-#").parse(), "[json.exception.parse_error.101] parse error at 5: syntax error - invalid number; expected digit after exponent sign; last read '-0E-#'"); CHECK_THROWS_WITH(json::parser("-0#").parse(), - "[json.exception.parse_error.101] parse error at 3: syntax error - unexpected '#'; expected end of input"); + "[json.exception.parse_error.101] parse error at 3: syntax error - invalid literal; last read: '-0#'; expected end of input"); CHECK_THROWS_WITH(json::parser("-0.0:").parse(), "[json.exception.parse_error.101] parse error at 5: syntax error - unexpected ':'; expected end of input"); CHECK_THROWS_WITH(json::parser("-0.0Z").parse(), - "[json.exception.parse_error.101] parse error at 5: syntax error - unexpected 'Z'; expected end of input"); + "[json.exception.parse_error.101] parse error at 5: syntax error - invalid literal; last read: '-0.0Z'; expected end of input"); CHECK_THROWS_WITH(json::parser("-0E123:").parse(), "[json.exception.parse_error.101] parse error at 7: syntax error - unexpected ':'; expected end of input"); CHECK_THROWS_WITH(json::parser("-0e0-:").parse(), - "[json.exception.parse_error.101] parse error at 5: syntax error - unexpected '-'; expected end of input"); + "[json.exception.parse_error.101] parse error at 6: syntax error - invalid number; expected digit after '-'; last read: '-:'; expected end of input"); CHECK_THROWS_WITH(json::parser("-0e-:").parse(), "[json.exception.parse_error.101] parse error at 5: syntax error - invalid number; expected digit after exponent sign; last read '-0e-:'"); CHECK_THROWS_WITH(json::parser("-0f").parse(), - "[json.exception.parse_error.101] parse error at 3: syntax error - unexpected 'f'; expected end of input"); + "[json.exception.parse_error.101] parse error at 4: syntax error - invalid literal; expected 'false'; last read: '-0f'; expected end of input"); } } } diff --git a/test/src/unit-testsuites.cpp b/test/src/unit-testsuites.cpp index c46a4ffa..b0c721d2 100644 --- a/test/src/unit-testsuites.cpp +++ b/test/src/unit-testsuites.cpp @@ -77,9 +77,8 @@ TEST_CASE("compliance tests from json.org") }) { CAPTURE(filename); - json j; std::ifstream f(filename); - CHECK_THROWS_AS(j << f, json::parse_error); + CHECK_THROWS_AS(json::parse(f), json::parse_error); } } @@ -93,9 +92,8 @@ TEST_CASE("compliance tests from json.org") }) { CAPTURE(filename); - json j; std::ifstream f(filename); - CHECK_NOTHROW(j << f); + CHECK_NOTHROW(json::parse(f)); } } } @@ -320,7 +318,7 @@ TEST_CASE("test suite from json-test-suite") // strings in a JSON array std::ifstream f("test/data/json_testsuite/sample.json"); json j; - CHECK_NOTHROW(j << f); + CHECK_NOTHROW(j = json::parse(f)); // the array has 3 elements CHECK(j.size() == 3); @@ -334,36 +332,31 @@ TEST_CASE("json.org examples") SECTION("1.json") { std::ifstream f("test/data/json.org/1.json"); - json j; - CHECK_NOTHROW(j << f); + CHECK_NOTHROW(json::parse(f)); } SECTION("2.json") { std::ifstream f("test/data/json.org/2.json"); - json j; - CHECK_NOTHROW(j << f); + CHECK_NOTHROW(json::parse(f)); } SECTION("3.json") { std::ifstream f("test/data/json.org/3.json"); - json j; - CHECK_NOTHROW(j << f); + CHECK_NOTHROW(json::parse(f)); } SECTION("4.json") { std::ifstream f("test/data/json.org/4.json"); - json j; - CHECK_NOTHROW(j << f); + CHECK_NOTHROW(json::parse(f)); } SECTION("5.json") { std::ifstream f("test/data/json.org/5.json"); - json j; - CHECK_NOTHROW(j << f); + CHECK_NOTHROW(json::parse(f)); } } @@ -545,8 +538,7 @@ TEST_CASE("nst's JSONTestSuite") { CAPTURE(filename); std::ifstream f(filename); - json j; - CHECK_NOTHROW(j << f); + CHECK_NOTHROW(json::parse(f)); } } @@ -754,8 +746,7 @@ TEST_CASE("nst's JSONTestSuite") { CAPTURE(filename); std::ifstream f(filename); - json j; - CHECK_THROWS_AS(j << f, json::parse_error); + CHECK_THROWS_AS(json::parse(f), json::parse_error); } } @@ -777,8 +768,7 @@ TEST_CASE("nst's JSONTestSuite") { CAPTURE(filename); std::ifstream f(filename); - json j; - CHECK_NOTHROW(j << f); + CHECK_NOTHROW(json::parse(f)); } } @@ -797,8 +787,7 @@ TEST_CASE("nst's JSONTestSuite") { CAPTURE(filename); std::ifstream f(filename); - json j; - CHECK_THROWS_AS(j << f, json::out_of_range); + CHECK_THROWS_AS(json::parse(f), json::out_of_range); } } @@ -824,8 +813,7 @@ TEST_CASE("nst's JSONTestSuite") { CAPTURE(filename); std::ifstream f(filename); - json j; - CHECK_THROWS_AS(j << f, json::parse_error); + CHECK_THROWS_AS(json::parse(f), json::parse_error); } } } @@ -851,8 +839,7 @@ TEST_CASE("Big List of Naughty Strings") SECTION("parsing blns.json") { std::ifstream f("test/data/big-list-of-naughty-strings/blns.json"); - json j; - CHECK_NOTHROW(j << f); + CHECK_NOTHROW(json::parse(f)); } // check if parsed strings roundtrip From 1e495945f1f20dcb986e27b4f55f1350fb2d10c6 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Tue, 28 Mar 2017 18:06:43 +0200 Subject: [PATCH 09/44] :hammer: fixed buffer overflow --- src/json.hpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/json.hpp b/src/json.hpp index cbd630e4..bdb3861a 100644 --- a/src/json.hpp +++ b/src/json.hpp @@ -8575,7 +8575,7 @@ class basic_json case 0x7f: // UTF-8 string (indefinite length) { std::string result; - while (check_length(v.size(), 1, idx), v[idx] != 0xff) + while (static_cast(check_length(v.size(), 1, idx)), v[idx] != 0xff) { string_t s = from_cbor_internal(v, idx); result += s; @@ -8671,7 +8671,7 @@ class basic_json case 0x9f: // array (indefinite length) { basic_json result = value_t::array; - while (check_length(v.size(), 1, idx), v[idx] != 0xff) + while (static_cast(check_length(v.size(), 1, idx)), v[idx] != 0xff) { result.push_back(from_cbor_internal(v, idx)); } @@ -8776,7 +8776,7 @@ class basic_json case 0xbf: // map (indefinite length) { basic_json result = value_t::object; - while (check_length(v.size(), 1, idx), v[idx] != 0xff) + while (static_cast(check_length(v.size(), 1, idx)), v[idx] != 0xff) { cbor_expect_string(v, idx); std::string key = from_cbor_internal(v, idx); @@ -10442,7 +10442,9 @@ class basic_json std::string read(size_t offset, size_t length) override { - return std::string(start + offset, length); + // avoid reading too many characters + const size_t max_length = static_cast(limit-start); + return std::string(start + offset, std::min({length, max_length})); } private: From 6a6fbea62c61fb0416cbc891100bc4acd976d95e Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Tue, 28 Mar 2017 20:08:08 +0200 Subject: [PATCH 10/44] :hammer: some cleanup --- src/json.hpp | 117 ++++++++++++++++++++++++++------------------------- 1 file changed, 59 insertions(+), 58 deletions(-) diff --git a/src/json.hpp b/src/json.hpp index bdb3861a..48607bb8 100644 --- a/src/json.hpp +++ b/src/json.hpp @@ -10443,7 +10443,7 @@ class basic_json std::string read(size_t offset, size_t length) override { // avoid reading too many characters - const size_t max_length = static_cast(limit-start); + const size_t max_length = static_cast(limit - start); return std::string(start + offset, std::min({length, max_length})); } @@ -10462,9 +10462,9 @@ class basic_json literal_false, ///< the `false` literal literal_null, ///< the `null` literal value_string, ///< a string -- use get_string() for actual value - value_unsigned, ///< an unsigned integer -- use get_number() for actual value - value_integer, ///< a signed integer -- use get_number() for actual value - value_float, ///< an floating point number -- use get_number() for actual value + value_unsigned, ///< an unsigned integer -- use get_number_unsigned() for actual value + value_integer, ///< a signed integer -- use get_number_integer() for actual value + value_float, ///< an floating point number -- use get_number_float() for actual value begin_array, ///< the character for array begin `[` begin_object, ///< the character for object begin `{` end_array, ///< the character for array end `]` @@ -10476,7 +10476,7 @@ class basic_json }; /// return name of values of type token_type (only used for errors) - static std::string token_type_name(const token_type t) + static const char* token_type_name(const token_type t) noexcept { switch (t) { @@ -10563,6 +10563,7 @@ class basic_json int codepoint = 0; + // check the next 4 bytes for (size_t i = 0; i < 4; ++i) { const int8_t digit = ascii_to_hex[static_cast(get())]; @@ -10575,6 +10576,7 @@ class basic_json codepoint += digit; } + // except the last byte, result must be multiplied by 16 if (i != 3) { codepoint <<= 4; @@ -10895,7 +10897,7 @@ class basic_json } // check if code point is a high surrogate - if (codepoint1 >= 0xD800 and codepoint1 <= 0xDBFF) + if (0xD800 <= codepoint1 and codepoint1 <= 0xDBFF) { // expect next \uxxxx entry if (JSON_LIKELY(get() == '\\' and get() == 'u')) @@ -10909,7 +10911,7 @@ class basic_json } // check if codepoint2 is a low surrogate - if (codepoint2 >= 0xDC00 and codepoint2 <= 0xDFFF) + if (JSON_LIKELY(0xDC00 <= codepoint2 and codepoint2 <= 0xDFFF)) { codepoint = // high surrogate occupies the most significant 22 bits @@ -10935,7 +10937,7 @@ class basic_json } else { - if (JSON_UNLIKELY(codepoint1 >= 0xDC00 and codepoint1 <= 0xDFFF)) + if (JSON_UNLIKELY(0xDC00 <= codepoint1 and codepoint1 <= 0xDFFF)) { error_message = "invalid string: missing high surrogate"; return token_type::parse_error; @@ -10993,6 +10995,7 @@ class basic_json // end case 16: { + // terminate yytext add('\0'); --yylen; return token_type::value_string; @@ -11108,15 +11111,13 @@ class basic_json add('\0'); --yylen; - // the conversion - char* endptr = nullptr; - // try to parse integers first and fall back to floats if (not has_exp and not has_point) { errno = 0; if (has_sign) { + char* endptr = nullptr; const auto x = std::strtoll(yytext.data(), &endptr, 10); value_integer = static_cast(x); if (JSON_LIKELY(errno == 0 and endptr == yytext.data() + yylen and value_integer == x)) @@ -11126,6 +11127,7 @@ class basic_json } else { + char* endptr = nullptr; const auto x = std::strtoull(yytext.data(), &endptr, 10); value_unsigned = static_cast(x); if (JSON_LIKELY(errno == 0 and endptr == yytext.data() + yylen and value_unsigned == x)) @@ -11218,11 +11220,26 @@ class basic_json } public: - constexpr size_t get_position() const + constexpr size_t get_position() const noexcept { return chars_read; } + constexpr number_integer_t get_number_integer() const noexcept + { + return value_integer; + } + + constexpr number_unsigned_t get_number_unsigned() const noexcept + { + return value_unsigned; + } + + constexpr number_float_t get_number_float() const noexcept + { + return value_float; + } + const std::string get_string() { return std::string(yytext.data(), yylen); @@ -11252,49 +11269,11 @@ class basic_json return ss.str(); } - const std::string& get_error_message() const + const std::string& get_error_message() const noexcept { return error_message; } - bool get_number(basic_json& result, const token_type token) const - { - switch (token) - { - case lexer::token_type::value_unsigned: - { - result.m_type = value_t::number_unsigned; - result.m_value = value_unsigned; - return true; - } - - case lexer::token_type::value_integer: - { - result.m_type = value_t::number_integer; - result.m_value = value_integer; - return true; - } - - case lexer::token_type::value_float: - { - // throw in case of infinity or NAN - if (not std::isfinite(value_float)) - { - JSON_THROW(out_of_range::create(406, "number overflow parsing '" + get_token_string() + "'")); - } - - result.m_type = value_t::number_float; - result.m_value = value_float; - return true; - } - - default: - { - return false; - } - } - } - token_type scan() { // read next character and ignore whitespace @@ -11602,8 +11581,8 @@ class basic_json case lexer::token_type::literal_null: { - get_token(); result.m_type = value_t::null; + get_token(); break; } @@ -11616,25 +11595,47 @@ class basic_json case lexer::token_type::literal_true: { - get_token(); result.m_type = value_t::boolean; result.m_value = true; + get_token(); break; } case lexer::token_type::literal_false: { - get_token(); result.m_type = value_t::boolean; result.m_value = false; + get_token(); break; } case lexer::token_type::value_unsigned: + { + result.m_type = value_t::number_unsigned; + result.m_value = m_lexer.get_number_unsigned(); + get_token(); + break; + } + case lexer::token_type::value_integer: + { + result.m_type = value_t::number_integer; + result.m_value = m_lexer.get_number_integer(); + get_token(); + break; + } + case lexer::token_type::value_float: { - m_lexer.get_number(result, last_token); + result.m_type = value_t::number_float; + result.m_value = m_lexer.get_number_float(); + + // throw in case of infinity or NAN + if (JSON_UNLIKELY(not std::isfinite(result.m_value.number_float))) + { + JSON_THROW(out_of_range::create(406, "number overflow parsing '" + m_lexer.get_token_string() + "'")); + } + get_token(); break; } @@ -11674,10 +11675,10 @@ class basic_json } else { - error_msg += "unexpected " + lexer::token_type_name(last_token); + error_msg += "unexpected " + std::string(lexer::token_type_name(last_token)); } - error_msg += "; expected " + lexer::token_type_name(t); + error_msg += "; expected " + std::string(lexer::token_type_name(t)); JSON_THROW(parse_error::create(101, m_lexer.get_position(), error_msg)); } } @@ -11696,7 +11697,7 @@ class basic_json } else { - error_msg += "unexpected " + lexer::token_type_name(last_token); + error_msg += "unexpected " + std::string(lexer::token_type_name(last_token)); } JSON_THROW(parse_error::create(101, m_lexer.get_position(), error_msg)); From 50e251f5f644ca9fd2d2fd58a62f0eff2f31ab18 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Tue, 28 Mar 2017 20:17:00 +0200 Subject: [PATCH 11/44] :hammer: adjusted test case to new signature of token_type_name() --- test/src/unit-class_lexer.cpp | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/test/src/unit-class_lexer.cpp b/test/src/unit-class_lexer.cpp index d4bf2dd2..214cae75 100644 --- a/test/src/unit-class_lexer.cpp +++ b/test/src/unit-class_lexer.cpp @@ -87,22 +87,22 @@ TEST_CASE("lexer class") SECTION("token_type_name") { - CHECK((json::lexer::token_type_name(json::lexer::token_type::uninitialized) == "")); - CHECK((json::lexer::token_type_name(json::lexer::token_type::literal_true) == "true literal")); - CHECK((json::lexer::token_type_name(json::lexer::token_type::literal_false) == "false literal")); - CHECK((json::lexer::token_type_name(json::lexer::token_type::literal_null) == "null literal")); - CHECK((json::lexer::token_type_name(json::lexer::token_type::value_string) == "string literal")); - CHECK((json::lexer::token_type_name(json::lexer::token_type::value_unsigned) == "number literal")); - CHECK((json::lexer::token_type_name(json::lexer::token_type::value_integer) == "number literal")); - CHECK((json::lexer::token_type_name(json::lexer::token_type::value_float) == "number literal")); - CHECK((json::lexer::token_type_name(json::lexer::token_type::begin_array) == "'['")); - CHECK((json::lexer::token_type_name(json::lexer::token_type::begin_object) == "'{'")); - CHECK((json::lexer::token_type_name(json::lexer::token_type::end_array) == "']'")); - CHECK((json::lexer::token_type_name(json::lexer::token_type::end_object) == "'}'")); - CHECK((json::lexer::token_type_name(json::lexer::token_type::name_separator) == "':'")); - CHECK((json::lexer::token_type_name(json::lexer::token_type::value_separator) == "','")); - CHECK((json::lexer::token_type_name(json::lexer::token_type::parse_error) == "")); - CHECK((json::lexer::token_type_name(json::lexer::token_type::end_of_input) == "end of input")); + CHECK((json::lexer::token_type_name(std::string(json::lexer::token_type::uninitialized)) == "")); + CHECK((json::lexer::token_type_name(std::string(json::lexer::token_type::literal_true)) == "true literal")); + CHECK((json::lexer::token_type_name(std::string(json::lexer::token_type::literal_false)) == "false literal")); + CHECK((json::lexer::token_type_name(std::string(json::lexer::token_type::literal_null)) == "null literal")); + CHECK((json::lexer::token_type_name(std::string(json::lexer::token_type::value_string)) == "string literal")); + CHECK((json::lexer::token_type_name(std::string(json::lexer::token_type::value_unsigned)) == "number literal")); + CHECK((json::lexer::token_type_name(std::string(json::lexer::token_type::value_integer)) == "number literal")); + CHECK((json::lexer::token_type_name(std::string(json::lexer::token_type::value_float)) == "number literal")); + CHECK((json::lexer::token_type_name(std::string(json::lexer::token_type::begin_array)) == "'['")); + CHECK((json::lexer::token_type_name(std::string(json::lexer::token_type::begin_object)) == "'{'")); + CHECK((json::lexer::token_type_name(std::string(json::lexer::token_type::end_array)) == "']'")); + CHECK((json::lexer::token_type_name(std::string(json::lexer::token_type::end_object)) == "'}'")); + CHECK((json::lexer::token_type_name(std::string(json::lexer::token_type::name_separator)) == "':'")); + CHECK((json::lexer::token_type_name(std::string(json::lexer::token_type::value_separator)) == "','")); + CHECK((json::lexer::token_type_name(std::string(json::lexer::token_type::parse_error)) == "")); + CHECK((json::lexer::token_type_name(std::string(json::lexer::token_type::end_of_input)) == "end of input")); } SECTION("parse errors on first character") From d37ca2eba8363a294469b066dc137236c8a13802 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Tue, 28 Mar 2017 20:29:27 +0200 Subject: [PATCH 12/44] :hammer: fixed test case --- test/src/unit-class_lexer.cpp | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/test/src/unit-class_lexer.cpp b/test/src/unit-class_lexer.cpp index 214cae75..468e1f52 100644 --- a/test/src/unit-class_lexer.cpp +++ b/test/src/unit-class_lexer.cpp @@ -87,22 +87,22 @@ TEST_CASE("lexer class") SECTION("token_type_name") { - CHECK((json::lexer::token_type_name(std::string(json::lexer::token_type::uninitialized)) == "")); - CHECK((json::lexer::token_type_name(std::string(json::lexer::token_type::literal_true)) == "true literal")); - CHECK((json::lexer::token_type_name(std::string(json::lexer::token_type::literal_false)) == "false literal")); - CHECK((json::lexer::token_type_name(std::string(json::lexer::token_type::literal_null)) == "null literal")); - CHECK((json::lexer::token_type_name(std::string(json::lexer::token_type::value_string)) == "string literal")); - CHECK((json::lexer::token_type_name(std::string(json::lexer::token_type::value_unsigned)) == "number literal")); - CHECK((json::lexer::token_type_name(std::string(json::lexer::token_type::value_integer)) == "number literal")); - CHECK((json::lexer::token_type_name(std::string(json::lexer::token_type::value_float)) == "number literal")); - CHECK((json::lexer::token_type_name(std::string(json::lexer::token_type::begin_array)) == "'['")); - CHECK((json::lexer::token_type_name(std::string(json::lexer::token_type::begin_object)) == "'{'")); - CHECK((json::lexer::token_type_name(std::string(json::lexer::token_type::end_array)) == "']'")); - CHECK((json::lexer::token_type_name(std::string(json::lexer::token_type::end_object)) == "'}'")); - CHECK((json::lexer::token_type_name(std::string(json::lexer::token_type::name_separator)) == "':'")); - CHECK((json::lexer::token_type_name(std::string(json::lexer::token_type::value_separator)) == "','")); - CHECK((json::lexer::token_type_name(std::string(json::lexer::token_type::parse_error)) == "")); - CHECK((json::lexer::token_type_name(std::string(json::lexer::token_type::end_of_input)) == "end of input")); + CHECK((std::string(json::lexer::token_type_name(json::lexer::token_type::uninitialized)) == "")); + CHECK((std::string(json::lexer::token_type_name(json::lexer::token_type::literal_true)) == "true literal")); + CHECK((std::string(json::lexer::token_type_name(json::lexer::token_type::literal_false)) == "false literal")); + CHECK((std::string(json::lexer::token_type_name(json::lexer::token_type::literal_null)) == "null literal")); + CHECK((std::string(json::lexer::token_type_name(json::lexer::token_type::value_string)) == "string literal")); + CHECK((std::string(json::lexer::token_type_name(json::lexer::token_type::value_unsigned)) == "number literal")); + CHECK((std::string(json::lexer::token_type_name(json::lexer::token_type::value_integer)) == "number literal")); + CHECK((std::string(json::lexer::token_type_name(json::lexer::token_type::value_float)) == "number literal")); + CHECK((std::string(json::lexer::token_type_name(json::lexer::token_type::begin_array)) == "'['")); + CHECK((std::string(json::lexer::token_type_name(json::lexer::token_type::begin_object)) == "'{'")); + CHECK((std::string(json::lexer::token_type_name(json::lexer::token_type::end_array)) == "']'")); + CHECK((std::string(json::lexer::token_type_name(json::lexer::token_type::end_object)) == "'}'")); + CHECK((std::string(json::lexer::token_type_name(json::lexer::token_type::name_separator)) == "':'")); + CHECK((std::string(json::lexer::token_type_name(json::lexer::token_type::value_separator)) == "','")); + CHECK((std::string(json::lexer::token_type_name(json::lexer::token_type::parse_error)) == "")); + CHECK((std::string(json::lexer::token_type_name(json::lexer::token_type::end_of_input)) == "end of input")); } SECTION("parse errors on first character") From 6b12e4047863dfb2060f92c42bd7ac8960dfd672 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Tue, 28 Mar 2017 21:55:26 +0200 Subject: [PATCH 13/44] :checkered_flag: removed __builtin_expect for MSVC --- src/json.hpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/json.hpp b/src/json.hpp index 48607bb8..c10c4037 100644 --- a/src/json.hpp +++ b/src/json.hpp @@ -91,8 +91,13 @@ SOFTWARE. #endif // manual branch prediction -#define JSON_LIKELY(x) __builtin_expect(!!(x), 1) -#define JSON_UNLIKELY(x) __builtin_expect(!!(x), 0) +#if defined(__clang__) || defined(__GNUC__) || defined(__GNUG__) + #define JSON_LIKELY(x) __builtin_expect(!!(x), 1) + #define JSON_UNLIKELY(x) __builtin_expect(!!(x), 0) +#else + #define JSON_LIKELY(x) x + #define JSON_UNLIKELY(x) x +#endif /*! @brief namespace for Niels Lohmann From 19d119e18c3ff13fc85c822c52c78399f9256617 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Tue, 28 Mar 2017 22:10:24 +0200 Subject: [PATCH 14/44] :hammer: fixed a warning in MSVC --- src/json.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/json.hpp b/src/json.hpp index c10c4037..cb52a6ef 100644 --- a/src/json.hpp +++ b/src/json.hpp @@ -10323,7 +10323,7 @@ class basic_json // refill is.read(reinterpret_cast(buffer.data()), static_cast(buffer.size())); // set unfilled characters to EOF - std::fill_n(buffer.begin() + is.gcount(), + std::fill_n(buffer.begin() + static_cast(is.gcount()), buffer.size() - static_cast(is.gcount()), std::char_traits::eof()); // the buffer is ready From 5d586b01928e329606d705f45d7012b1c0ec7527 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Tue, 28 Mar 2017 23:20:45 +0200 Subject: [PATCH 15/44] :hammer: cleanup --- src/json.hpp | 84 +++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 60 insertions(+), 24 deletions(-) diff --git a/src/json.hpp b/src/json.hpp index cb52a6ef..e9b621f6 100644 --- a/src/json.hpp +++ b/src/json.hpp @@ -10296,10 +10296,11 @@ class basic_json JSON_THROW(parse_error::create(111, 0, "bad input stream")); } - // initial fill; unfilled buffer charaters remain EOF + // initial fill; unfilled buffer characters remain EOF is.read(buffer.data(), static_cast(buffer.size())); - // ignore byte-order mark + // skip byte-order mark + assert(buffer.size() >= 3); if (buffer[0] == '\xEF' and buffer[1] == '\xBB' and buffer[2] == '\xBF') { buffer_pos += 3; @@ -10317,7 +10318,7 @@ class basic_json int get_character() override { - // check if refilling is neccessary + // check if refilling is necessary if (JSON_UNLIKELY(buffer_pos == buffer.size())) { // refill @@ -10360,17 +10361,18 @@ class basic_json } private: + /// the associated input stream std::istream& is; - // chars returned via get_character() + /// chars returned via get_character() size_t processed_chars = 0; - // chars processed in the current buffer + /// chars processed in the current buffer size_t buffer_pos = 0; - // position of the stream when we started + /// position of the stream when we started const std::streampos start_position; - // internal buffer + /// internal buffer std::vector buffer; }; @@ -10419,6 +10421,7 @@ class basic_json } private: + /// the associated input stream std::istream& is; }; @@ -10430,6 +10433,7 @@ class basic_json : input_adapter(), cursor(b), limit(b + l), start(b) {} + // delete because of pointer members input_buffer_adapter(const input_buffer_adapter&) = delete; input_buffer_adapter& operator=(input_buffer_adapter&) = delete; @@ -10453,8 +10457,11 @@ class basic_json } private: + /// pointer to the current character const char* cursor; + /// pointer past the last character const char* limit; + /// pointer to the first character const char* start; }; @@ -10597,7 +10604,7 @@ class basic_json reset(); // we entered the function by reading an open quote - assert (current == '\"'); + assert(current == '\"'); static unsigned char next[256] = {17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 5, 2, 2, 6, 3, 3, 3, 7, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18}; @@ -11022,14 +11029,12 @@ class basic_json default: { - assert(false); + assert(false); // LCOV_EXCL_LINE } } } } - // overloaded wrappers for strtod/strtof/strtold - // that will be called from parse static void strtof(float& f, const char* str, char** endptr) noexcept { f = std::strtof(str, endptr); @@ -11125,7 +11130,7 @@ class basic_json char* endptr = nullptr; const auto x = std::strtoll(yytext.data(), &endptr, 10); value_integer = static_cast(x); - if (JSON_LIKELY(errno == 0 and endptr == yytext.data() + yylen and value_integer == x)) + if (errno == 0 and endptr == yytext.data() + yylen and value_integer == x) { return token_type::value_integer; } @@ -11135,7 +11140,7 @@ class basic_json char* endptr = nullptr; const auto x = std::strtoull(yytext.data(), &endptr, 10); value_unsigned = static_cast(x); - if (JSON_LIKELY(errno == 0 and endptr == yytext.data() + yylen and value_unsigned == x)) + if (errno == 0 and endptr == yytext.data() + yylen and value_unsigned == x) { return token_type::value_unsigned; } @@ -11148,6 +11153,7 @@ class basic_json token_type scan_true() { + assert(current == 't'); if (JSON_LIKELY((get() == 'r' and get() == 'u' and get() == 'e'))) { return token_type::literal_true; @@ -11159,6 +11165,7 @@ class basic_json token_type scan_false() { + assert(current == 'f'); if (JSON_LIKELY((get() == 'a' and get() == 'l' and get() == 's' and get() == 'e'))) { return token_type::literal_false; @@ -11170,6 +11177,7 @@ class basic_json token_type scan_null() { + assert(current == 'n'); if (JSON_LIKELY((get() == 'u' and get() == 'l' and get() == 'l'))) { return token_type::literal_null; @@ -11183,13 +11191,14 @@ class basic_json // input management ///////////////////// - void reset() + /// reset yytext + void reset() noexcept { yylen = 0; start_pos = chars_read - 1; } - // get a character from the input + /// get a character from the input int get() { ++chars_read; @@ -11206,14 +11215,14 @@ class basic_json return current; } - // unget a character to the input - void unget() + /// unget a character to the input + void unget() noexcept { --chars_read; next_unget = true; } - // add a character to yytext + /// add a character to yytext void add(int c) { // resize yytext if necessary @@ -11225,48 +11234,70 @@ class basic_json } public: - constexpr size_t get_position() const noexcept - { - return chars_read; - } + ///////////////////// + // value getters + ///////////////////// + /// return integer value constexpr number_integer_t get_number_integer() const noexcept { return value_integer; } + /// return unsigned integer value constexpr number_unsigned_t get_number_unsigned() const noexcept { return value_unsigned; } + /// return floating-point value constexpr number_float_t get_number_float() const noexcept { return value_float; } + /// return string value const std::string get_string() { + // yytext cannot be returned as char*, because it may contain a + // null byte return std::string(yytext.data(), yylen); } + ///////////////////// + // diagnostics + ///////////////////// + + /// return position of last read token + constexpr size_t get_position() const noexcept + { + return chars_read; + } + + /// return the last read token (for errors only) std::string get_token_string() const { + // get the raw byte sequence of the last token std::string s = ia->read(start_pos, chars_read - start_pos); - std::stringstream ss; + // escape control characters + std::stringstream ss; for (auto c : s) { if (c == '\0' or c == std::char_traits::eof()) { + // ignore EOF continue; } else if ('\x00' <= c and c <= '\x1f') { + // escape control characters ss << ""; } else { + + // add character as is ss << c; } } @@ -11274,11 +11305,16 @@ class basic_json return ss.str(); } + /// return syntax error message const std::string& get_error_message() const noexcept { return error_message; } + ///////////////////// + // actual scanner + ///////////////////// + token_type scan() { // read next character and ignore whitespace @@ -11370,7 +11406,7 @@ class basic_json number_unsigned_t value_unsigned = 0; number_float_t value_float = 0; - // the decimal point + /// the decimal point const char decimal_point_char = '\0'; }; From 60057a4728ac4a7a9826d14d3c15aba5213289e9 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Wed, 29 Mar 2017 07:54:26 +0200 Subject: [PATCH 16/44] :hammer: fixed bugs introduced by merging from develop --- src/json.hpp | 4 ++-- test/src/unit-regression.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/json.hpp b/src/json.hpp index 903f3178..d2dbb7fe 100644 --- a/src/json.hpp +++ b/src/json.hpp @@ -7390,7 +7390,7 @@ class basic_json JSON_DEPRECATED friend std::istream& operator<<(basic_json& j, std::istream& i) { - j = parser(i).parse(); + j = parser(i).parse(true); return i; } @@ -7422,7 +7422,7 @@ class basic_json */ friend std::istream& operator>>(std::istream& i, basic_json& j) { - j = parser(i).parse(false); + j = parser(i).parse(true); return i; } diff --git a/test/src/unit-regression.cpp b/test/src/unit-regression.cpp index e8eae0f8..c221b1ab 100644 --- a/test/src/unit-regression.cpp +++ b/test/src/unit-regression.cpp @@ -596,7 +596,7 @@ TEST_CASE("regression tests") // a parse error because of the EOF. CHECK_THROWS_AS(ss >> j, json::parse_error); CHECK_THROWS_WITH(ss >> j, - "[json.exception.parse_error.101] parse error at 1: parse error - unexpected end of input"); + "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input"); } SECTION("issue #389 - Integer-overflow (OSS-Fuzz issue 267)") From 4c821c9e9c842f957eb8100b642330639829b913 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Fri, 31 Mar 2017 17:11:51 +0200 Subject: [PATCH 17/44] :hammer: moved buffer size to interface --- src/json.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/json.hpp b/src/json.hpp index d2dbb7fe..f3e20f06 100644 --- a/src/json.hpp +++ b/src/json.hpp @@ -10307,9 +10307,9 @@ class basic_json class cached_input_stream_adapter : public input_adapter { public: - cached_input_stream_adapter(std::istream& i) + cached_input_stream_adapter(std::istream& i, const size_t buffer_size) : is(i), start_position(is.tellg()), - buffer(1024 * 1024, std::char_traits::eof()) + buffer(buffer_size, std::char_traits::eof()) { // immediately abort if stream is erroneous if (JSON_UNLIKELY(i.fail())) @@ -10552,7 +10552,7 @@ class basic_json } explicit lexer(std::istream& i) - : ia(new cached_input_stream_adapter(i)), + : ia(new cached_input_stream_adapter(i, 1024 * 1024)), decimal_point_char(get_decimal_point()) {} From 50ee0a62f5eaf40d62c67115ebf58f959ab4dc60 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Fri, 31 Mar 2017 23:24:33 +0200 Subject: [PATCH 18/44] :hammer: replaced lookup-tables by switches --- src/json.hpp | 1314 ++++++++++++++++++++++---------- test/src/unit-class_parser.cpp | 2 +- 2 files changed, 921 insertions(+), 395 deletions(-) diff --git a/src/json.hpp b/src/json.hpp index f3e20f06..51154ca1 100644 --- a/src/json.hpp +++ b/src/json.hpp @@ -10552,7 +10552,7 @@ class basic_json } explicit lexer(std::istream& i) - : ia(new cached_input_stream_adapter(i, 1024 * 1024)), + : ia(new cached_input_stream_adapter(i, 16384)), decimal_point_char(get_decimal_point()) {} @@ -10591,29 +10591,243 @@ class basic_json // must be called after \u was read; returns following xxxx as hex or -1 when error int get_codepoint() { - // a mapping to discover hex numbers - static int8_t ascii_to_hex[256] = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}; - + assert(current == 'u'); int codepoint = 0; - // check the next 4 bytes - for (size_t i = 0; i < 4; ++i) + switch (get()) { - const int8_t digit = ascii_to_hex[static_cast(get())]; - if (JSON_UNLIKELY(digit == -1)) - { + case '0': + break; + case '1': + codepoint += 0x1000; + break; + case '2': + codepoint += 0x2000; + break; + case '3': + codepoint += 0x3000; + break; + case '4': + codepoint += 0x4000; + break; + case '5': + codepoint += 0x5000; + break; + case '6': + codepoint += 0x6000; + break; + case '7': + codepoint += 0x7000; + break; + case '8': + codepoint += 0x8000; + break; + case '9': + codepoint += 0x9000; + break; + case 'A': + case 'a': + codepoint += 0xa000; + break; + case 'B': + case 'b': + codepoint += 0xb000; + break; + case 'C': + case 'c': + codepoint += 0xc000; + break; + case 'D': + case 'd': + codepoint += 0xd000; + break; + case 'E': + case 'e': + codepoint += 0xe000; + break; + case 'F': + case 'f': + codepoint += 0xf000; + break; + default: return -1; - } - else - { - codepoint += digit; - } + } - // except the last byte, result must be multiplied by 16 - if (i != 3) - { - codepoint <<= 4; - } + switch (get()) + { + case '0': + break; + case '1': + codepoint += 0x0100; + break; + case '2': + codepoint += 0x0200; + break; + case '3': + codepoint += 0x0300; + break; + case '4': + codepoint += 0x0400; + break; + case '5': + codepoint += 0x0500; + break; + case '6': + codepoint += 0x0600; + break; + case '7': + codepoint += 0x0700; + break; + case '8': + codepoint += 0x0800; + break; + case '9': + codepoint += 0x0900; + break; + case 'A': + case 'a': + codepoint += 0x0a00; + break; + case 'B': + case 'b': + codepoint += 0x0b00; + break; + case 'C': + case 'c': + codepoint += 0x0c00; + break; + case 'D': + case 'd': + codepoint += 0x0d00; + break; + case 'E': + case 'e': + codepoint += 0x0e00; + break; + case 'F': + case 'f': + codepoint += 0x0f00; + break; + default: + return -1; + } + + switch (get()) + { + case '0': + break; + case '1': + codepoint += 0x0010; + break; + case '2': + codepoint += 0x0020; + break; + case '3': + codepoint += 0x0030; + break; + case '4': + codepoint += 0x0040; + break; + case '5': + codepoint += 0x0050; + break; + case '6': + codepoint += 0x0060; + break; + case '7': + codepoint += 0x0070; + break; + case '8': + codepoint += 0x0080; + break; + case '9': + codepoint += 0x0090; + break; + case 'A': + case 'a': + codepoint += 0x00a0; + break; + case 'B': + case 'b': + codepoint += 0x00b0; + break; + case 'C': + case 'c': + codepoint += 0x00c0; + break; + case 'D': + case 'd': + codepoint += 0x00d0; + break; + case 'E': + case 'e': + codepoint += 0x00e0; + break; + case 'F': + case 'f': + codepoint += 0x00f0; + break; + default: + return -1; + } + + switch (get()) + { + case '0': + break; + case '1': + codepoint += 0x0001; + break; + case '2': + codepoint += 0x0002; + break; + case '3': + codepoint += 0x0003; + break; + case '4': + codepoint += 0x0004; + break; + case '5': + codepoint += 0x0005; + break; + case '6': + codepoint += 0x0006; + break; + case '7': + codepoint += 0x0007; + break; + case '8': + codepoint += 0x0008; + break; + case '9': + codepoint += 0x0009; + break; + case 'A': + case 'a': + codepoint += 0x000a; + break; + case 'B': + case 'b': + codepoint += 0x000b; + break; + case 'C': + case 'c': + codepoint += 0x000c; + break; + case 'D': + case 'd': + codepoint += 0x000d; + break; + case 'E': + case 'e': + codepoint += 0x000e; + break; + case 'F': + case 'f': + codepoint += 0x000f; + break; + default: + return -1; } return codepoint; @@ -10627,260 +10841,31 @@ class basic_json // we entered the function by reading an open quote assert(current == '\"'); - static unsigned char next[256] = {17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 5, 2, 2, 6, 3, 3, 3, 7, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18}; - - // state variable - int state = -1; - - // whether the state is already set - bool state_set = false; - while (true) { // get next character get(); - // end of file while parsing string - if (JSON_UNLIKELY(current == std::char_traits::eof())) + switch (current) { - error_message = "invalid string: missing closing quote"; - return token_type::parse_error; - } - - // after coping with EOF, we only cope with bytes - //assert(0 <= current and current <= 255); - unsigned char ch = static_cast(current); - - // get next state - state = state_set ? state : next[ch]; - // reset variable - state_set = false; - - // 'add': 0, - // 'add_check1': 1, - // 'add_check2': 2, - // 'add_check3': 3, - // 'add_check_e0': 4, - // 'add_check_ed': 5, - // 'add_check_f0': 6, - // 'add_check_f4': 7, - // 'check1': 8, - // 'check2': 9, - // 'check3': 10, - // 'check_e0': 11, - // 'check_ed': 12, - // 'check_f0': 13, - // 'check_f4': 14, - // 'escape': 15, - // 'end': 16, - // 'error_invalid': 17, - // 'error_utf8': 18 - assert(0 <= state and state <= 18); - - switch (state) - { - // add - case 0: + // end of file while parsing string + case std::char_traits::eof(): { - add(current); - break; + error_message = "invalid string: missing closing quote"; + return token_type::parse_error; } - // add_check1 - case 1: + // closing quote + case '\"': { - add(current); - // next state is check1 - state = 8; - state_set = true; - break; + // terminate yytext + add('\0'); + --yylen; + return token_type::value_string; } - // add_check2 - case 2: - { - add(current); - // next state is check2 - state = 9; - state_set = true; - break; - } - - // add_check3 - case 3: - { - add(current); - // next state is check3 - state = 10; - state_set = true; - break; - } - - // add_check_e0 - case 4: - { - add(current); - // next state is check_e0 - state = 11; - state_set = true; - break; - } - - // add_check_ed - case 5: - { - add(current); - // next state is check_ed - state = 12; - state_set = true; - break; - } - - // add_check_f0 - case 6: - { - add(current); - // next state is check_f0 - state = 13; - state_set = true; - break; - } - - // add_check_f4 - case 7: - { - add(current); - // next state is check_f4 - state = 14; - state_set = true; - break; - } - - // check1 - case 8: - { - if (JSON_LIKELY(0x80 <= ch and ch <= 0xBF)) - { - add(current); - break; - } - else - { - error_message = "invalid string: not well-formed UTF-8 byte"; - return token_type::parse_error; - } - } - - // check2 - case 9: - { - if (JSON_LIKELY(0x80 <= ch and ch <= 0xBF)) - { - add(current); - // next state is check1 - state = 8; - state_set = true; - break; - } - else - { - error_message = "invalid string: not well-formed UTF-8 byte"; - return token_type::parse_error; - } - } - - // check3 - case 10: - { - if (JSON_LIKELY(0x80 <= ch and ch <= 0xBF)) - { - add(current); - // next state is check2 - state = 9; - state_set = true; - break; - } - else - { - error_message = "invalid string: not well-formed UTF-8 byte"; - return token_type::parse_error; - } - } - - // check_e0 - case 11: - { - if (JSON_LIKELY(0xA0 <= ch and ch <= 0xBF)) - { - add(current); - // next state is check1 - state = 8; - state_set = true; - break; - } - else - { - error_message = "invalid string: not well-formed UTF-8 byte"; - return token_type::parse_error; - } - } - - // check_ed - case 12: - { - if (JSON_LIKELY(0x80 <= ch and ch <= 0x9F)) - { - add(current); - // next state is check1 - state = 8; - state_set = true; - break; - } - else - { - error_message = "invalid string: not well-formed UTF-8 byte"; - return token_type::parse_error; - } - } - - // check_f0 - case 13: - { - if (JSON_LIKELY(0x90 <= ch and ch <= 0xBF)) - { - add(current); - // next state is check2 - state = 9; - state_set = true; - break; - } - else - { - error_message = "invalid string: not well-formed UTF-8 byte"; - return token_type::parse_error; - } - } - - // check_f4 - case 14: - { - if (JSON_LIKELY(0x80 <= ch and ch <= 0x8F)) - { - add(current); - // next state is check2 - state = 9; - state_set = true; - break; - } - else - { - error_message = "invalid string: not well-formed UTF-8 byte"; - return token_type::parse_error; - } - } - - // escape - case 15: + // escapes + case '\\': { switch (get()) { @@ -10935,7 +10920,7 @@ class basic_json // expect next \uxxxx entry if (JSON_LIKELY(get() == '\\' and get() == 'u')) { - int codepoint2 = get_codepoint(); + const int codepoint2 = get_codepoint(); if (JSON_UNLIKELY(codepoint2 == -1)) { @@ -11025,32 +11010,344 @@ class basic_json break; } - // end - case 16: - { - // terminate yytext - add('\0'); - --yylen; - return token_type::value_string; - } - - // error_invalid - case 17: + // invalid control characters + case '\x00': + case '\x01': + case '\x02': + case '\x03': + case '\x04': + case '\x05': + case '\x06': + case '\x07': + case '\x08': + case '\x09': + case '\x0a': + case '\x0b': + case '\x0c': + case '\x0d': + case '\x0e': + case '\x0f': + case '\x10': + case '\x11': + case '\x12': + case '\x13': + case '\x14': + case '\x15': + case '\x16': + case '\x17': + case '\x18': + case '\x19': + case '\x1a': + case '\x1b': + case '\x1c': + case '\x1d': + case '\x1e': + case '\x1f': { error_message = "invalid string: control characters (U+0000 through U+001f) must be escaped"; return token_type::parse_error; } - // error_utf8 - case 18: + // U+0020..U+007F (except U+0022 (quote) and U+005C (backspace)) + case '\x20': + case '\x21': + case '\x23': + case '\x24': + case '\x25': + case '\x26': + case '\x27': + case '\x28': + case '\x29': + case '\x2a': + case '\x2b': + case '\x2c': + case '\x2d': + case '\x2e': + case '\x2f': + case '\x30': + case '\x31': + case '\x32': + case '\x33': + case '\x34': + case '\x35': + case '\x36': + case '\x37': + case '\x38': + case '\x39': + case '\x3a': + case '\x3b': + case '\x3c': + case '\x3d': + case '\x3e': + case '\x3f': + case '\x40': + case '\x41': + case '\x42': + case '\x43': + case '\x44': + case '\x45': + case '\x46': + case '\x47': + case '\x48': + case '\x49': + case '\x4a': + case '\x4b': + case '\x4c': + case '\x4d': + case '\x4e': + case '\x4f': + case '\x50': + case '\x51': + case '\x52': + case '\x53': + case '\x54': + case '\x55': + case '\x56': + case '\x57': + case '\x58': + case '\x59': + case '\x5a': + case '\x5b': + case '\x5d': + case '\x5e': + case '\x5f': + case '\x60': + case '\x61': + case '\x62': + case '\x63': + case '\x64': + case '\x65': + case '\x66': + case '\x67': + case '\x68': + case '\x69': + case '\x6a': + case '\x6b': + case '\x6c': + case '\x6d': + case '\x6e': + case '\x6f': + case '\x70': + case '\x71': + case '\x72': + case '\x73': + case '\x74': + case '\x75': + case '\x76': + case '\x77': + case '\x78': + case '\x79': + case '\x7a': + case '\x7b': + case '\x7c': + case '\x7d': + case '\x7e': + case '\x7f': { + add(current); + break; + } + + // U+0080..U+07FF: bytes C2..DF 80..BF + case '\xc2': + case '\xc3': + case '\xc4': + case '\xc5': + case '\xc6': + case '\xc7': + case '\xc8': + case '\xc9': + case '\xca': + case '\xcb': + case '\xcc': + case '\xcd': + case '\xce': + case '\xcf': + case '\xd0': + case '\xd1': + case '\xd2': + case '\xd3': + case '\xd4': + case '\xd5': + case '\xd6': + case '\xd7': + case '\xd8': + case '\xd9': + case '\xda': + case '\xdb': + case '\xdc': + case '\xdd': + case '\xde': + case '\xdf': + { + add(current); + get(); + if (JSON_LIKELY('\x80' <= current and current <= '\xbf')) + { + add(current); + continue; + } + error_message = "invalid string: not well-formed UTF-8 byte"; return token_type::parse_error; } + // U+0800..U+0FFF: bytes E0 A0..BF 80..BF + case '\xe0': + { + add(current); + get(); + if (JSON_LIKELY('\xa0' <= current and current <= '\xbf')) + { + add(current); + get(); + if (JSON_LIKELY('\x80' <= current and current <= '\xbf')) + { + add(current); + continue; + } + } + + error_message = "invalid string: not well-formed UTF-8 byte"; + return token_type::parse_error; + } + + // U+1000..U+CFFF: bytes E1..EC 80..BF 80..BF + // U+E000..U+FFFF: bytes EE..EF 80..BF 80..BF + case '\xe1': + case '\xe2': + case '\xe3': + case '\xe4': + case '\xe5': + case '\xe6': + case '\xe7': + case '\xe8': + case '\xe9': + case '\xea': + case '\xeb': + case '\xec': + case '\xee': + case '\xef': + { + add(current); + get(); + if (JSON_LIKELY('\x80' <= current and current <= '\xbf')) + { + add(current); + get(); + if (JSON_LIKELY('\x80' <= current and current <= '\xbf')) + { + add(current); + continue; + } + } + + error_message = "invalid string: not well-formed UTF-8 byte"; + return token_type::parse_error; + } + + // U+D000..U+D7FF: bytes ED 80..9F 80..BF + case '\xed': + { + add(current); + get(); + if (JSON_LIKELY('\x80' <= current and current <= '\x9f')) + { + add(current); + get(); + if (JSON_LIKELY('\x80' <= current and current <= '\xbf')) + { + add(current); + continue; + } + } + + error_message = "invalid string: not well-formed UTF-8 byte"; + return token_type::parse_error; + } + + // U+10000..U+3FFFF F0 90..BF 80..BF 80..BF + case '\xf0': + { + add(current); + get(); + if (JSON_LIKELY('\x90' <= current and current <= '\xbf')) + { + add(current); + get(); + if (JSON_LIKELY('\x80' <= current and current <= '\xbf')) + { + add(current); + get(); + if (JSON_LIKELY('\x80' <= current and current <= '\xbf')) + { + add(current); + continue; + } + } + } + + error_message = "invalid string: not well-formed UTF-8 byte"; + return token_type::parse_error; + } + + // U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF + case '\xf1': + case '\xf2': + case '\xf3': + { + add(current); + get(); + if (JSON_LIKELY('\x80' <= current and current <= '\xbf')) + { + add(current); + get(); + if (JSON_LIKELY('\x80' <= current and current <= '\xbf')) + { + add(current); + get(); + if (JSON_LIKELY('\x80' <= current and current <= '\xbf')) + { + add(current); + continue; + } + } + } + + error_message = "invalid string: not well-formed UTF-8 byte"; + return token_type::parse_error; + } + + // U+100000..U+10FFFF F4 80..8F 80..BF 80..BF + case '\xf4': + { + add(current); + get(); + if (JSON_LIKELY('\x80' <= current and current <= '\x8f')) + { + add(current); + get(); + if (JSON_LIKELY('\x80' <= current and current <= '\xbf')) + { + add(current); + get(); + if (JSON_LIKELY('\x80' <= current and current <= '\xbf')) + { + add(current); + continue; + } + } + } + + error_message = "invalid string: not well-formed UTF-8 byte"; + return token_type::parse_error; + } + + // remaining bytes (80..C1 and F5..FF) are not well-formed default: { - assert(false); // LCOV_EXCL_LINE + error_message = "invalid string: not well-formed UTF-8 byte"; + return token_type::parse_error; } } } @@ -11071,70 +11368,301 @@ class basic_json f = std::strtold(str, endptr); } + /*! + state | 0 | 1-9 | e E | + | - | . | anything + ---------|----------|----------|----------|---------|---------|----------|----------- + init | zero | any1 | [error] | [error] | minus | [error] | [error] + minus | zero | any1 | [error] | [error] | [error] | [error] | [error] + zero | done | done | exponent | done | done | decimal1 | done + any1 | any1 | any1 | exponent | done | done | decimal1 | done + decimal1 | decimal2 | [error] | [error] | [error] | [error] | [error] | [error] + decimal2 | decimal2 | decimal2 | exponent | done | done | done | done + exponent | any2 | any2 | [error] | sign | sign | [error] | [error] + sign | any2 | any2 | [error] | [error] | [error] | [error] | [error] + any2 | any2 | any2 | done | done | done | done | done + */ token_type scan_number() { - static unsigned char lookup[9][256] = - { - {10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 1, 10, 10, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10}, - {10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10}, - {9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 4, 9, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 5, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 5, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9}, - {9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 4, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 5, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 5, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9}, - {10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10}, - {10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 8, 10, 8, 10, 10, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10}, - {9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 5, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 5, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9}, - {9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9}, - {10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10} - }; - reset(); - bool has_sign = false; - bool has_exp = false; - bool has_point = false; + // the type of the parsed number; initially set to unsigned; will + // be changed if minus sign, decimal point or exponent is read + token_type number_type = token_type::value_unsigned; - int state = lookup[0][static_cast(current)]; - int old_state = 0; - - while (state != 9) + // state: we just found out we need to scan a number + switch (current) { - has_sign = has_sign or (state == 1); - has_point = has_point or (state == 4); - has_exp = has_exp or (state == 5); - - if (JSON_UNLIKELY(state == 10)) + case '-': { - // create error message based on previous state - switch (old_state) - { - case 0: - error_message = "invalid number; expected '-' or digit"; - break; - case 1: - error_message = "invalid number; expected digit after '-'"; - break; - case 4: - error_message = "invalid number; expected digit after '.'"; - break; - case 5: - error_message = "invalid number; expected '+', '-', or digit after exponent"; - break; - case 8: - error_message = "invalid number; expected digit after exponent sign"; - break; - default: - assert(false); // no error in the other states - break; - } - return token_type::parse_error; + add(current); + goto scan_number_minus; } - // add current character and fix decimal point - add((state == 4) ? decimal_point_char : current); - get(); - old_state = state; - state = lookup[state][static_cast(current)]; + case '0': + { + add(current); + goto scan_number_zero; + } + + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { + add(current); + goto scan_number_any1; + } + + default: + { + // all other characters are rejected outside scan_number() + assert(false); // LCOV_EXCL_LINE + } } +scan_number_minus: + // state: we just parsed a leading minus sign + number_type = token_type::value_integer; + switch (get()) + { + case '0': + { + add(current); + goto scan_number_zero; + } + + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { + add(current); + goto scan_number_any1; + } + + default: + { + error_message = "invalid number; expected digit after '-'"; + return token_type::parse_error; + } + } + +scan_number_zero: + // state: we just parse a zero (maybe with a leading minus sign) + switch (get()) + { + case '.': + { + add(decimal_point_char); + goto scan_number_decimal1; + } + + case 'e': + case 'E': + { + add(current); + goto scan_number_exponent; + } + + default: + { + goto scan_number_done; + } + } + +scan_number_any1: + // state: we just parsed a number 0-9 (maybe with a leading minus sign) + switch (get()) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { + add(current); + goto scan_number_any1; + } + + case '.': + { + add(decimal_point_char); + goto scan_number_decimal1; + } + + case 'e': + case 'E': + { + add(current); + goto scan_number_exponent; + } + + default: + { + goto scan_number_done; + } + } + +scan_number_decimal1: + // state: we just parsed a decimal point + number_type = token_type::value_float; + switch (get()) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { + add(current); + goto scan_number_decimal2; + } + + default: + { + error_message = "invalid number; expected digit after '.'"; + return token_type::parse_error; + } + } + +scan_number_decimal2: + // we just parsed at least one number after a decimal point + switch (get()) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { + add(current); + goto scan_number_decimal2; + } + + case 'e': + case 'E': + { + add(current); + goto scan_number_exponent; + } + + default: + { + goto scan_number_done; + } + } + +scan_number_exponent: + // we just parsed an exponent + number_type = token_type::value_float; + switch (get()) + { + case '+': + case '-': + { + add(current); + goto scan_number_sign; + } + + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { + add(current); + goto scan_number_any2; + } + + default: + { + error_message = "invalid number; expected '+', '-', or digit after exponent"; + return token_type::parse_error; + } + } + +scan_number_sign: + // we just parsed an exponent sign + switch (get()) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { + add(current); + goto scan_number_any2; + } + + default: + { + error_message = "invalid number; expected digit after exponent sign"; + return token_type::parse_error; + } + } + +scan_number_any2: + // we just parsed a number after the exponent or exponent sign + switch (get()) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { + add(current); + goto scan_number_any2; + } + + default: + { + goto scan_number_done; + } + } + +scan_number_done: // unget the character after the number unget(); @@ -11143,30 +11671,42 @@ class basic_json --yylen; // try to parse integers first and fall back to floats - if (not has_exp and not has_point) + if (number_type == token_type::value_unsigned) { + char* endptr = nullptr; errno = 0; - if (has_sign) + const auto x = std::strtoull(yytext.data(), &endptr, 10); + + // we checked the number format before + assert(endptr == yytext.data() + yylen); + + if (errno == 0) { - char* endptr = nullptr; - const auto x = std::strtoll(yytext.data(), &endptr, 10); - value_integer = static_cast(x); - if (errno == 0 and endptr == yytext.data() + yylen and value_integer == x) - { - return token_type::value_integer; - } - } - else - { - char* endptr = nullptr; - const auto x = std::strtoull(yytext.data(), &endptr, 10); value_unsigned = static_cast(x); - if (errno == 0 and endptr == yytext.data() + yylen and value_unsigned == x) + if (value_unsigned == x) { return token_type::value_unsigned; } } } + else if (number_type == token_type::value_integer) + { + char* endptr = nullptr; + errno = 0; + const auto x = std::strtoll(yytext.data(), &endptr, 10); + + // we checked the number format before + assert(endptr == yytext.data() + yylen); + + if (errno == 0) + { + value_integer = static_cast(x); + if (value_integer == x) + { + return token_type::value_integer; + } + } + } strtof(value_float, yytext.data(), nullptr); return token_type::value_float; @@ -11223,17 +11763,9 @@ class basic_json int get() { ++chars_read; - - if (JSON_UNLIKELY(next_unget)) - { - next_unget = false; - } - else - { - current = ia->get_character(); - } - - return current; + return next_unget + ? (next_unget = false, current) + : (current = ia->get_character()); } /// unget a character to the input @@ -11317,7 +11849,6 @@ class basic_json } else { - // add character as is ss << c; } @@ -11441,8 +11972,7 @@ class basic_json public: /// a parser reading from a string literal parser(const char* buff, const parser_callback_t cb = nullptr) - : callback(cb), - m_lexer(buff, std::strlen(buff)) + : callback(cb), m_lexer(buff, std::strlen(buff)) {} /*! @@ -11528,22 +12058,11 @@ class basic_json return result; } - // no comma is expected here - unexpect(lexer::token_type::value_separator); - - // otherwise: parse key-value pairs - do + // parse values + while (true) { - // ugly, but could be fixed with loop reorganization - if (last_token == lexer::token_type::value_separator) - { - get_token(); - } - // store key expect(lexer::token_type::value_string); - // FIXME get_string returns const char*; maybe we can - // avoid this copy in the future const auto key = m_lexer.get_string(); bool keep_tag = false; @@ -11571,12 +12090,20 @@ class basic_json { result[key] = std::move(value); } - } - while (last_token == lexer::token_type::value_separator); - // closing } - expect(lexer::token_type::end_object); - get_token(); + // comma -> next value + if (last_token == lexer::token_type::value_separator) + { + get_token(); + continue; + } + + // closing } + expect(lexer::token_type::end_object); + get_token(); + break; + } + if (keep and callback and not callback(--depth, parse_event_t::object_end, result)) { result = basic_json(value_t::discarded); @@ -11609,30 +12136,29 @@ class basic_json return result; } - // no comma is expected here - unexpect(lexer::token_type::value_separator); - - // otherwise: parse values - do + // parse values + while (true) { - // ugly, but could be fixed with loop reorganization - if (last_token == lexer::token_type::value_separator) - { - get_token(); - } - // parse value auto value = parse_internal(keep); if (keep and not value.is_discarded()) { result.push_back(std::move(value)); } - } - while (last_token == lexer::token_type::value_separator); - // closing ] - expect(lexer::token_type::end_array); - get_token(); + // comma -> next value + if (last_token == lexer::token_type::value_separator) + { + get_token(); + continue; + } + + // closing ] + expect(lexer::token_type::end_array); + get_token(); + break; + } + if (keep and callback and not callback(--depth, parse_event_t::array_end, result)) { result = basic_json(value_t::discarded); @@ -11728,7 +12254,7 @@ class basic_json */ void expect(typename lexer::token_type t) const { - if (t != last_token) + if (JSON_UNLIKELY(t != last_token)) { std::string error_msg = "syntax error - "; if (last_token == lexer::token_type::parse_error) @@ -11750,7 +12276,7 @@ class basic_json */ void unexpect(typename lexer::token_type t) const { - if (t == last_token) + if (JSON_UNLIKELY(t == last_token)) { std::string error_msg = "syntax error - "; if (last_token == lexer::token_type::parse_error) diff --git a/test/src/unit-class_parser.cpp b/test/src/unit-class_parser.cpp index f36eb900..864b7be1 100644 --- a/test/src/unit-class_parser.cpp +++ b/test/src/unit-class_parser.cpp @@ -659,7 +659,7 @@ TEST_CASE("parser class") // test case to make sure no comma preceeds the first key CHECK_THROWS_AS(json::parser("{,\"key\": false}").parse(), json::parse_error); CHECK_THROWS_WITH(json::parser("{,\"key\": false}").parse(), - "[json.exception.parse_error.101] parse error at 2: syntax error - unexpected ','"); + "[json.exception.parse_error.101] parse error at 2: syntax error - unexpected ','; expected string literal"); // test case to make sure an object is properly closed CHECK_THROWS_AS(json::parser("[{\"key\": false true]").parse(), json::parse_error); CHECK_THROWS_WITH(json::parser("[{\"key\": false true]").parse(), From 3fa0610856c9eca53665c3cf2d0bc8b06b6bbb64 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Fri, 31 Mar 2017 23:57:24 +0200 Subject: [PATCH 19/44] :hammer: added minibench to ignore list --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 69a81cbd..58ced04e 100644 --- a/.gitignore +++ b/.gitignore @@ -30,3 +30,6 @@ test/parse_afl_fuzzer test/parse_cbor_fuzzer test/parse_msgpack_fuzzer + +minibench + From c32d2e5b3c735c16eda5673c6cce5bf56effbf20 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Sat, 1 Apr 2017 00:36:05 +0200 Subject: [PATCH 20/44] :hammer: removed unget() function --- src/json.hpp | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/src/json.hpp b/src/json.hpp index ae3dab6f..4198b140 100644 --- a/src/json.hpp +++ b/src/json.hpp @@ -11664,7 +11664,8 @@ scan_number_any2: scan_number_done: // unget the character after the number - unget(); + --chars_read; + next_unget = true; // terminate token add('\0'); @@ -11768,13 +11769,6 @@ scan_number_done: : (current = ia->get_character()); } - /// unget a character to the input - void unget() noexcept - { - --chars_read; - next_unget = true; - } - /// add a character to yytext void add(int c) { From 3a5cf9bd0a58aed6f26ed0802f938d0ed348f75e Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Sat, 1 Apr 2017 08:34:58 +0200 Subject: [PATCH 21/44] :hammer: improved code coverage --- src/json.hpp | 10 ++++------ test/src/unit-class_lexer.cpp | 9 +++++++++ 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/src/json.hpp b/src/json.hpp index 4198b140..1c00e12a 100644 --- a/src/json.hpp +++ b/src/json.hpp @@ -10965,6 +10965,9 @@ class basic_json codepoint = codepoint1; } + // result of the above calculation yields a proper codepoint + assert(0x00 <= codepoint and codepoint <= 0x10FFFF); + // translate code point to bytes if (codepoint < 0x80) { @@ -10984,7 +10987,7 @@ class basic_json add(0x80 | ((codepoint >> 6) & 0x3F)); add(0x80 | (codepoint & 0x3F)); } - else if (codepoint <= 0x10ffff) + else { // 4-byte characters: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx add(0xF0 | (codepoint >> 18)); @@ -10992,11 +10995,6 @@ class basic_json add(0x80 | ((codepoint >> 6) & 0x3F)); add(0x80 | (codepoint & 0x3F)); } - else - { - error_message = "invalid string: code points above U+10FFFF are invalid"; - return token_type::parse_error; - } break; } diff --git a/test/src/unit-class_lexer.cpp b/test/src/unit-class_lexer.cpp index 468e1f52..2acea176 100644 --- a/test/src/unit-class_lexer.cpp +++ b/test/src/unit-class_lexer.cpp @@ -158,6 +158,15 @@ TEST_CASE("lexer class") } } + SECTION("very large string") + { + // strings larger than 1024 bytes yield a resize of the lexer's yytext buffer + std::string s("\""); + s += std::string(2048, 'x'); + s += "\""; + CHECK((json::lexer(s.c_str(), 2050).scan() == json::lexer::token_type::value_string)); + } + /* NOTE: to_unicode function has been removed SECTION("to_unicode") { From 89efe627fe58abd6e1c17d0cd89fea787a45ee01 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Sun, 2 Apr 2017 18:46:21 +0200 Subject: [PATCH 22/44] :hammer: a lot of restructuring - removed uncached input stream adapter; it was too slow anyway - implemented a class binary_read which parses CBOR based on input adapters - in the CBOR parser, numbers are created via memcpy to avoid undefined behavior --- Makefile | 6 +- src/json.hpp | 1311 ++++++++++++++++++++++++---------- test/src/unit-cbor.cpp | 32 +- test/src/unit-regression.cpp | 22 +- 4 files changed, 948 insertions(+), 423 deletions(-) diff --git a/Makefile b/Makefile index 179df3aa..138a85b6 100644 --- a/Makefile +++ b/Makefile @@ -47,7 +47,8 @@ doctest: # -Wno-keyword-macro: unit-tests use "#define private public" # -Wno-deprecated-declarations: the library deprecated some functions # -Wno-weak-vtables: exception class is defined inline, but has virtual method -# -Wno-range-loop-analysis: iterator_wrapper tests tests "for(const auto i...)" +# -Wno-range-loop-analysis: iterator_wrapper tests "for(const auto i...)" +# -Wno-float-equal: not all comparisons in the tests can be replaced by Approx pedantic_clang: $(MAKE) json_unit CXXFLAGS="\ -std=c++11 \ @@ -58,7 +59,8 @@ pedantic_clang: -Wno-keyword-macro \ -Wno-deprecated-declarations \ -Wno-weak-vtables \ - -Wno-range-loop-analysis" + -Wno-range-loop-analysis \ + -Wno-float-equal" # calling GCC with most warnings pedantic_gcc: diff --git a/src/json.hpp b/src/json.hpp index 1c00e12a..e96a53e3 100644 --- a/src/json.hpp +++ b/src/json.hpp @@ -38,7 +38,7 @@ SOFTWARE. #include // nullptr_t, ptrdiff_t, size_t #include // int64_t, uint64_t #include // abort, strtod, strtof, strtold, strtoul, strtoll, strtoull -#include // strlen +#include // memcpy, strlen #include // forward_list #include // function, hash, less #include // initializer_list @@ -9228,12 +9228,12 @@ class basic_json @since version 2.0.9, parameter @a start_index since 2.1.1 */ - static basic_json from_cbor(const std::vector& v, - const size_t start_index = 0) - { - size_t i = start_index; - return from_cbor_internal(v, i); - } + //static basic_json from_cbor(const std::vector& v, + // const size_t start_index = 0) + //{ + // size_t i = start_index; + // return from_cbor_internal(v, i); + //} /// @} @@ -10281,10 +10281,727 @@ class basic_json private: + //////////////////// + // input adapters // + //////////////////// + + /// abstract input adapter interface + class input_adapter + { + public: + virtual int get_character() = 0; + virtual std::string read(size_t offset, size_t length) = 0; + virtual ~input_adapter() {} + }; + + /// input adapter for cached stream input + class cached_input_stream_adapter : public input_adapter + { + public: + cached_input_stream_adapter(std::istream& i, const size_t buffer_size) + : is(i), start_position(is.tellg()), + buffer(buffer_size, std::char_traits::eof()) + { + // immediately abort if stream is erroneous + if (JSON_UNLIKELY(i.fail())) + { + JSON_THROW(parse_error::create(111, 0, "bad input stream")); + } + + // initial fill; unfilled buffer characters remain EOF + is.read(buffer.data(), static_cast(buffer.size())); + + // skip byte-order mark + assert(buffer.size() >= 3); + if (buffer[0] == '\xEF' and buffer[1] == '\xBB' and buffer[2] == '\xBF') + { + buffer_pos += 3; + processed_chars += 3; + } + } + + ~cached_input_stream_adapter() override + { + // clear stream flags + is.clear(); + // set stream after last processed char + is.seekg(start_position + static_cast(processed_chars - 1)); + } + + int get_character() override + { + // check if refilling is necessary + if (JSON_UNLIKELY(buffer_pos == buffer.size())) + { + // refill + is.read(reinterpret_cast(buffer.data()), static_cast(buffer.size())); + // set unfilled characters to EOF + std::fill_n(buffer.begin() + static_cast(is.gcount()), + buffer.size() - static_cast(is.gcount()), + std::char_traits::eof()); + // the buffer is ready + buffer_pos = 0; + } + + ++processed_chars; + const int res = buffer[buffer_pos++]; + return (res == std::char_traits::eof()) ? res : res & 0xFF; + } + + std::string read(size_t offset, size_t length) override + { + // create buffer + std::string result(length, '\0'); + + // save stream position + auto current_pos = is.tellg(); + // save stream flags + auto flags = is.rdstate(); + + // clear stream flags + is.clear(); + // set stream position + is.seekg(static_cast(offset)); + // read bytes + is.read(&result[0], static_cast(length)); + + // reset stream position + is.seekg(current_pos); + // reset stream flags + is.setstate(flags); + + return result; + } + + private: + /// the associated input stream + std::istream& is; + + /// chars returned via get_character() + size_t processed_chars = 0; + /// chars processed in the current buffer + size_t buffer_pos = 0; + + /// position of the stream when we started + const std::streampos start_position; + + /// internal buffer + std::vector buffer; + }; + + /// input adapter for buffer input + class input_buffer_adapter : public input_adapter + { + public: + input_buffer_adapter(const char* b, size_t l) + : input_adapter(), cursor(b), limit(b + l), start(b) + {} + + // delete because of pointer members + input_buffer_adapter(const input_buffer_adapter&) = delete; + input_buffer_adapter& operator=(input_buffer_adapter&) = delete; + + int get_character() override + { + if (JSON_LIKELY(cursor < limit)) + { + return *(cursor++) & 0xFF; + } + else + { + return std::char_traits::eof(); + } + } + + std::string read(size_t offset, size_t length) override + { + // avoid reading too many characters + const size_t max_length = static_cast(limit - start); + return std::string(start + offset, std::min({length, max_length})); + } + + private: + /// pointer to the current character + const char* cursor; + /// pointer past the last character + const char* limit; + /// pointer to the first character + const char* start; + }; + + //////////////////// + // binary formats // + //////////////////// + + private: + class binary_reader + { + public: + explicit binary_reader(std::istream& i) + : ia(new cached_input_stream_adapter(i, 16384)) + {} + + binary_reader(const char* buff, const size_t len) + : ia(new input_buffer_adapter(buff, len)) + {} + + ~binary_reader() + { + delete ia; + } + + // switch off unwanted functions (due to pointer members) + binary_reader(const binary_reader&) = delete; + binary_reader operator=(const binary_reader&) = delete; + + /*! + @param[in] get_char whether a new character should be retrieved from + the input (true, default) or whether the last + read character should be considered instead + */ + basic_json parse_cbor(const bool get_char = true) + { + switch (get_char ? get() : current) + { + // EOF + case std::char_traits::eof(): + { + JSON_THROW(parse_error::create(110, chars_read, "unexpected end of input")); + } + + // Integer 0x00..0x17 (0..23) + case 0x00: + case 0x01: + case 0x02: + case 0x03: + case 0x04: + case 0x05: + case 0x06: + case 0x07: + case 0x08: + case 0x09: + case 0x0a: + case 0x0b: + case 0x0c: + case 0x0d: + case 0x0e: + case 0x0f: + case 0x10: + case 0x11: + case 0x12: + case 0x13: + case 0x14: + case 0x15: + case 0x16: + case 0x17: + { + return static_cast(current); + } + + case 0x18: // Unsigned integer (one-byte uint8_t follows) + { + return get_number(); + } + + case 0x19: // Unsigned integer (two-byte uint16_t follows) + { + return get_number(); + } + + case 0x1a: // Unsigned integer (four-byte uint32_t follows) + { + return get_number(); + } + + case 0x1b: // Unsigned integer (eight-byte uint64_t follows) + { + return get_number(); + } + + // Negative integer -1-0x00..-1-0x17 (-1..-24) + case 0x20: + case 0x21: + case 0x22: + case 0x23: + case 0x24: + case 0x25: + case 0x26: + case 0x27: + case 0x28: + case 0x29: + case 0x2a: + case 0x2b: + case 0x2c: + case 0x2d: + case 0x2e: + case 0x2f: + case 0x30: + case 0x31: + case 0x32: + case 0x33: + case 0x34: + case 0x35: + case 0x36: + case 0x37: + { + return static_cast(0x20 - 1 - current); + } + + case 0x38: // Negative integer (one-byte uint8_t follows) + { + // must be uint8_t ! + return static_cast(-1) - get_number(); + } + + case 0x39: // Negative integer -1-n (two-byte uint16_t follows) + { + return static_cast(-1) - get_number(); + } + + case 0x3a: // Negative integer -1-n (four-byte uint32_t follows) + { + return static_cast(-1) - get_number(); + } + + case 0x3b: // Negative integer -1-n (eight-byte uint64_t follows) + { + return static_cast(-1) - static_cast(get_number()); + } + + // UTF-8 string (0x00..0x17 bytes follow) + case 0x60: + case 0x61: + case 0x62: + case 0x63: + case 0x64: + case 0x65: + case 0x66: + case 0x67: + case 0x68: + case 0x69: + case 0x6a: + case 0x6b: + case 0x6c: + case 0x6d: + case 0x6e: + case 0x6f: + case 0x70: + case 0x71: + case 0x72: + case 0x73: + case 0x74: + case 0x75: + case 0x76: + case 0x77: + case 0x78: // UTF-8 string (one-byte uint8_t for n follows) + case 0x79: // UTF-8 string (two-byte uint16_t for n follow) + case 0x7a: // UTF-8 string (four-byte uint32_t for n follow) + case 0x7b: // UTF-8 string (eight-byte uint64_t for n follow) + case 0x7f: // UTF-8 string (indefinite length) + { + return get_cbor_string(); + } + + // array (0x00..0x17 data items follow) + case 0x80: + case 0x81: + case 0x82: + case 0x83: + case 0x84: + case 0x85: + case 0x86: + case 0x87: + case 0x88: + case 0x89: + case 0x8a: + case 0x8b: + case 0x8c: + case 0x8d: + case 0x8e: + case 0x8f: + case 0x90: + case 0x91: + case 0x92: + case 0x93: + case 0x94: + case 0x95: + case 0x96: + case 0x97: + { + basic_json result = value_t::array; + const auto len = static_cast(current - 0x80); + for (size_t i = 0; i < len; ++i) + { + result.push_back(parse_cbor()); + } + return result; + } + + case 0x98: // array (one-byte uint8_t for n follows) + { + basic_json result = value_t::array; + const auto len = static_cast(get_number()); + for (size_t i = 0; i < len; ++i) + { + result.push_back(parse_cbor()); + } + return result; + } + + case 0x99: // array (two-byte uint16_t for n follow) + { + basic_json result = value_t::array; + const auto len = static_cast(get_number()); + for (size_t i = 0; i < len; ++i) + { + result.push_back(parse_cbor()); + } + return result; + } + + case 0x9a: // array (four-byte uint32_t for n follow) + { + basic_json result = value_t::array; + const auto len = static_cast(get_number()); + for (size_t i = 0; i < len; ++i) + { + result.push_back(parse_cbor()); + } + return result; + } + + case 0x9b: // array (eight-byte uint64_t for n follow) + { + basic_json result = value_t::array; + const auto len = static_cast(get_number()); + for (size_t i = 0; i < len; ++i) + { + result.push_back(parse_cbor()); + } + return result; + } + + case 0x9f: // array (indefinite length) + { + basic_json result = value_t::array; + while (get() != 0xff) + { + result.push_back(parse_cbor(false)); + } + return result; + } + + // map (0x00..0x17 pairs of data items follow) + case 0xa0: + case 0xa1: + case 0xa2: + case 0xa3: + case 0xa4: + case 0xa5: + case 0xa6: + case 0xa7: + case 0xa8: + case 0xa9: + case 0xaa: + case 0xab: + case 0xac: + case 0xad: + case 0xae: + case 0xaf: + case 0xb0: + case 0xb1: + case 0xb2: + case 0xb3: + case 0xb4: + case 0xb5: + case 0xb6: + case 0xb7: + { + basic_json result = value_t::object; + const auto len = static_cast(current - 0xa0); + for (size_t i = 0; i < len; ++i) + { + get(); + result[get_cbor_string()] = parse_cbor(); + } + return result; + } + + case 0xb8: // map (one-byte uint8_t for n follows) + { + basic_json result = value_t::object; + const auto len = static_cast(get_number()); + for (size_t i = 0; i < len; ++i) + { + get(); + result[get_cbor_string()] = parse_cbor(); + } + return result; + } + + case 0xb9: // map (two-byte uint16_t for n follow) + { + basic_json result = value_t::object; + const auto len = static_cast(get_number()); + for (size_t i = 0; i < len; ++i) + { + get(); + result[get_cbor_string()] = parse_cbor(); + } + return result; + } + + case 0xba: // map (four-byte uint32_t for n follow) + { + basic_json result = value_t::object; + const auto len = static_cast(get_number()); + for (size_t i = 0; i < len; ++i) + { + get(); + result[get_cbor_string()] = parse_cbor(); + } + return result; + } + + case 0xbb: // map (eight-byte uint64_t for n follow) + { + basic_json result = value_t::object; + const auto len = static_cast(get_number()); + for (size_t i = 0; i < len; ++i) + { + get(); + result[get_cbor_string()] = parse_cbor(); + } + return result; + } + + case 0xbf: // map (indefinite length) + { + basic_json result = value_t::object; + while (get() != 0xff) + { + result[get_cbor_string()] = parse_cbor(); + } + return result; + } + + case 0xf4: // false + { + return false; + } + + case 0xf5: // true + { + return true; + } + + case 0xf6: // null + { + return value_t::null; + } + + case 0xf9: // Half-Precision Float (two-byte IEEE 754) + { + const int byte1 = get(); + check_eof(); + const int byte2 = get(); + check_eof(); + + // code from RFC 7049, Appendix D, Figure 3: + // As half-precision floating-point numbers were only added to + // IEEE 754 in 2008, today's programming platforms often still + // only have limited support for them. It is very easy to + // include at least decoding support for them even without such + // support. An example of a small decoder for half-precision + // floating-point numbers in the C language is shown in Fig. 3. + const int half = (byte1 << 8) + byte2; + const int exp = (half >> 10) & 0x1f; + const int mant = half & 0x3ff; + double val; + if (exp == 0) + { + val = std::ldexp(mant, -24); + } + else if (exp != 31) + { + val = std::ldexp(mant + 1024, exp - 25); + } + else + { + val = mant == 0 + ? std::numeric_limits::infinity() + : std::numeric_limits::quiet_NaN(); + } + return (half & 0x8000) != 0 ? -val : val; + } + + case 0xfa: // Single-Precision Float (four-byte IEEE 754) + { + return get_number(); + } + + case 0xfb: // Double-Precision Float (eight-byte IEEE 754) + { + return get_number(); + } + + default: // anything else (0xFF is handled inside the other types) + { + std::stringstream ss; + ss << std::hex << current; + JSON_THROW(parse_error::create(112, chars_read, "error reading CBOR; last byte: 0x" + ss.str())); + } + } + } + + private: + int get() + { + ++chars_read; + return (current = ia->get_character()); + } + + // todo: check if this breaks with endianess + template + T get_number() + { + std::array vec; + for (size_t i = 0; i < sizeof(T); ++i) + { + get(); + check_eof(); + vec[sizeof(T) - i - 1] = static_cast(current); + } + + T result; + std::memcpy(&result, vec.data(), sizeof(T)); + return result; + } + + std::string get_string(const size_t len) + { + std::string result; + for (size_t i = 0; i < len; ++i) + { + get(); + check_eof(); + result.append(1, static_cast(current)); + } + return result; + } + + std::string get_cbor_string() + { + check_eof(); + + switch (current) + { + // UTF-8 string (0x00..0x17 bytes follow) + case 0x60: + case 0x61: + case 0x62: + case 0x63: + case 0x64: + case 0x65: + case 0x66: + case 0x67: + case 0x68: + case 0x69: + case 0x6a: + case 0x6b: + case 0x6c: + case 0x6d: + case 0x6e: + case 0x6f: + case 0x70: + case 0x71: + case 0x72: + case 0x73: + case 0x74: + case 0x75: + case 0x76: + case 0x77: + { + const auto len = static_cast(current - 0x60); + return get_string(len); + } + + case 0x78: // UTF-8 string (one-byte uint8_t for n follows) + { + const auto len = static_cast(get_number()); + return get_string(len); + } + + case 0x79: // UTF-8 string (two-byte uint16_t for n follow) + { + const auto len = static_cast(get_number()); + return get_string(len); + } + + case 0x7a: // UTF-8 string (four-byte uint32_t for n follow) + { + const auto len = static_cast(get_number()); + return get_string(len); + } + + case 0x7b: // UTF-8 string (eight-byte uint64_t for n follow) + { + const auto len = static_cast(get_number()); + return get_string(len); + } + + case 0x7f: // UTF-8 string (indefinite length) + { + std::string result; + while (get() != 0xff) + { + check_eof(); + result.append(1, static_cast(current)); + } + return result; + } + + default: + { + std::stringstream ss; + ss << std::hex << current; + JSON_THROW(parse_error::create(113, chars_read, "expected a CBOR string; last byte: 0x" + ss.str())); + } + } + } + + void check_eof() + { + if (JSON_UNLIKELY(current == std::char_traits::eof())) + { + JSON_THROW(parse_error::create(110, chars_read, "unexpected end of input")); + } + } + + private: + /// input adapter + input_adapter* ia = nullptr; + + /// the current character + int current = std::char_traits::eof(); + + /// the number of characters read + size_t chars_read = 0; + }; + + public: + static basic_json from_cbor(const std::vector& v, + const size_t start_index = 0) + { + binary_reader br(reinterpret_cast(v.data() + start_index), v.size() - start_index); + return br.parse_cbor(); + } + ////////////////////// // lexer and parser // ////////////////////// + private: /*! @brief lexical analysis @@ -10292,200 +11009,6 @@ class basic_json */ class lexer { - private: - - /// abstract input adapter interface - class input_adapter - { - public: - virtual int get_character() = 0; - virtual std::string read(size_t offset, size_t length) = 0; - virtual ~input_adapter() {} - }; - - /// input adapter for cached stream input - class cached_input_stream_adapter : public input_adapter - { - public: - cached_input_stream_adapter(std::istream& i, const size_t buffer_size) - : is(i), start_position(is.tellg()), - buffer(buffer_size, std::char_traits::eof()) - { - // immediately abort if stream is erroneous - if (JSON_UNLIKELY(i.fail())) - { - JSON_THROW(parse_error::create(111, 0, "bad input stream")); - } - - // initial fill; unfilled buffer characters remain EOF - is.read(buffer.data(), static_cast(buffer.size())); - - // skip byte-order mark - assert(buffer.size() >= 3); - if (buffer[0] == '\xEF' and buffer[1] == '\xBB' and buffer[2] == '\xBF') - { - buffer_pos += 3; - processed_chars += 3; - } - } - - ~cached_input_stream_adapter() override - { - // clear stream flags - is.clear(); - // set stream after last processed char - is.seekg(start_position + static_cast(processed_chars - 1)); - } - - int get_character() override - { - // check if refilling is necessary - if (JSON_UNLIKELY(buffer_pos == buffer.size())) - { - // refill - is.read(reinterpret_cast(buffer.data()), static_cast(buffer.size())); - // set unfilled characters to EOF - std::fill_n(buffer.begin() + static_cast(is.gcount()), - buffer.size() - static_cast(is.gcount()), - std::char_traits::eof()); - // the buffer is ready - buffer_pos = 0; - } - - ++processed_chars; - return buffer[buffer_pos++]; - } - - std::string read(size_t offset, size_t length) override - { - // create buffer - std::string result(length, '\0'); - - // save stream position - auto current_pos = is.tellg(); - // save stream flags - auto flags = is.rdstate(); - - // clear stream flags - is.clear(); - // set stream position - is.seekg(static_cast(offset)); - // read bytes - is.read(&result[0], static_cast(length)); - - // reset stream position - is.seekg(current_pos); - // reset stream flags - is.setstate(flags); - - return result; - } - - private: - /// the associated input stream - std::istream& is; - - /// chars returned via get_character() - size_t processed_chars = 0; - /// chars processed in the current buffer - size_t buffer_pos = 0; - - /// position of the stream when we started - const std::streampos start_position; - - /// internal buffer - std::vector buffer; - }; - - /// input adapter for uncached stream input - class input_stream_adapter : public input_adapter - { - public: - input_stream_adapter(std::istream& i) - : is(i) - { - // immediately abort if stream is erroneous - if (i.fail()) - { - JSON_THROW(parse_error::create(111, 0, "bad input stream")); - } - } - - int get_character() override - { - return is.get(); - } - - std::string read(size_t offset, size_t length) override - { - // create buffer - std::string result(length, '\0'); - - // save stream position - auto current_pos = is.tellg(); - // save stream flags - auto flags = is.rdstate(); - - // clear stream flags - is.clear(); - // set stream position - is.seekg(offset); - // read bytes - is.read(&result[0], length); - - // reset stream position - is.seekg(current_pos); - // reset stream flags - is.setstate(flags); - - return result; - } - - private: - /// the associated input stream - std::istream& is; - }; - - /// input adapter for buffer input - class input_buffer_adapter : public input_adapter - { - public: - input_buffer_adapter(const char* b, size_t l) - : input_adapter(), cursor(b), limit(b + l), start(b) - {} - - // delete because of pointer members - input_buffer_adapter(const input_buffer_adapter&) = delete; - input_buffer_adapter& operator=(input_buffer_adapter&) = delete; - - int get_character() override - { - if (JSON_LIKELY(cursor < limit)) - { - return *cursor++; - } - else - { - return std::char_traits::eof(); - } - } - - std::string read(size_t offset, size_t length) override - { - // avoid reading too many characters - const size_t max_length = static_cast(limit - start); - return std::string(start + offset, std::min({length, max_length})); - } - - private: - /// pointer to the current character - const char* cursor; - /// pointer past the last character - const char* limit; - /// pointer to the first character - const char* start; - }; - public: /// token types for the parser enum class token_type @@ -11009,178 +11532,178 @@ class basic_json } // invalid control characters - case '\x00': - case '\x01': - case '\x02': - case '\x03': - case '\x04': - case '\x05': - case '\x06': - case '\x07': - case '\x08': - case '\x09': - case '\x0a': - case '\x0b': - case '\x0c': - case '\x0d': - case '\x0e': - case '\x0f': - case '\x10': - case '\x11': - case '\x12': - case '\x13': - case '\x14': - case '\x15': - case '\x16': - case '\x17': - case '\x18': - case '\x19': - case '\x1a': - case '\x1b': - case '\x1c': - case '\x1d': - case '\x1e': - case '\x1f': + case 0x00: + case 0x01: + case 0x02: + case 0x03: + case 0x04: + case 0x05: + case 0x06: + case 0x07: + case 0x08: + case 0x09: + case 0x0a: + case 0x0b: + case 0x0c: + case 0x0d: + case 0x0e: + case 0x0f: + case 0x10: + case 0x11: + case 0x12: + case 0x13: + case 0x14: + case 0x15: + case 0x16: + case 0x17: + case 0x18: + case 0x19: + case 0x1a: + case 0x1b: + case 0x1c: + case 0x1d: + case 0x1e: + case 0x1f: { error_message = "invalid string: control characters (U+0000 through U+001f) must be escaped"; return token_type::parse_error; } // U+0020..U+007F (except U+0022 (quote) and U+005C (backspace)) - case '\x20': - case '\x21': - case '\x23': - case '\x24': - case '\x25': - case '\x26': - case '\x27': - case '\x28': - case '\x29': - case '\x2a': - case '\x2b': - case '\x2c': - case '\x2d': - case '\x2e': - case '\x2f': - case '\x30': - case '\x31': - case '\x32': - case '\x33': - case '\x34': - case '\x35': - case '\x36': - case '\x37': - case '\x38': - case '\x39': - case '\x3a': - case '\x3b': - case '\x3c': - case '\x3d': - case '\x3e': - case '\x3f': - case '\x40': - case '\x41': - case '\x42': - case '\x43': - case '\x44': - case '\x45': - case '\x46': - case '\x47': - case '\x48': - case '\x49': - case '\x4a': - case '\x4b': - case '\x4c': - case '\x4d': - case '\x4e': - case '\x4f': - case '\x50': - case '\x51': - case '\x52': - case '\x53': - case '\x54': - case '\x55': - case '\x56': - case '\x57': - case '\x58': - case '\x59': - case '\x5a': - case '\x5b': - case '\x5d': - case '\x5e': - case '\x5f': - case '\x60': - case '\x61': - case '\x62': - case '\x63': - case '\x64': - case '\x65': - case '\x66': - case '\x67': - case '\x68': - case '\x69': - case '\x6a': - case '\x6b': - case '\x6c': - case '\x6d': - case '\x6e': - case '\x6f': - case '\x70': - case '\x71': - case '\x72': - case '\x73': - case '\x74': - case '\x75': - case '\x76': - case '\x77': - case '\x78': - case '\x79': - case '\x7a': - case '\x7b': - case '\x7c': - case '\x7d': - case '\x7e': - case '\x7f': + case 0x20: + case 0x21: + case 0x23: + case 0x24: + case 0x25: + case 0x26: + case 0x27: + case 0x28: + case 0x29: + case 0x2a: + case 0x2b: + case 0x2c: + case 0x2d: + case 0x2e: + case 0x2f: + case 0x30: + case 0x31: + case 0x32: + case 0x33: + case 0x34: + case 0x35: + case 0x36: + case 0x37: + case 0x38: + case 0x39: + case 0x3a: + case 0x3b: + case 0x3c: + case 0x3d: + case 0x3e: + case 0x3f: + case 0x40: + case 0x41: + case 0x42: + case 0x43: + case 0x44: + case 0x45: + case 0x46: + case 0x47: + case 0x48: + case 0x49: + case 0x4a: + case 0x4b: + case 0x4c: + case 0x4d: + case 0x4e: + case 0x4f: + case 0x50: + case 0x51: + case 0x52: + case 0x53: + case 0x54: + case 0x55: + case 0x56: + case 0x57: + case 0x58: + case 0x59: + case 0x5a: + case 0x5b: + case 0x5d: + case 0x5e: + case 0x5f: + case 0x60: + case 0x61: + case 0x62: + case 0x63: + case 0x64: + case 0x65: + case 0x66: + case 0x67: + case 0x68: + case 0x69: + case 0x6a: + case 0x6b: + case 0x6c: + case 0x6d: + case 0x6e: + case 0x6f: + case 0x70: + case 0x71: + case 0x72: + case 0x73: + case 0x74: + case 0x75: + case 0x76: + case 0x77: + case 0x78: + case 0x79: + case 0x7a: + case 0x7b: + case 0x7c: + case 0x7d: + case 0x7e: + case 0x7f: { add(current); break; } // U+0080..U+07FF: bytes C2..DF 80..BF - case '\xc2': - case '\xc3': - case '\xc4': - case '\xc5': - case '\xc6': - case '\xc7': - case '\xc8': - case '\xc9': - case '\xca': - case '\xcb': - case '\xcc': - case '\xcd': - case '\xce': - case '\xcf': - case '\xd0': - case '\xd1': - case '\xd2': - case '\xd3': - case '\xd4': - case '\xd5': - case '\xd6': - case '\xd7': - case '\xd8': - case '\xd9': - case '\xda': - case '\xdb': - case '\xdc': - case '\xdd': - case '\xde': - case '\xdf': + case 0xc2: + case 0xc3: + case 0xc4: + case 0xc5: + case 0xc6: + case 0xc7: + case 0xc8: + case 0xc9: + case 0xca: + case 0xcb: + case 0xcc: + case 0xcd: + case 0xce: + case 0xcf: + case 0xd0: + case 0xd1: + case 0xd2: + case 0xd3: + case 0xd4: + case 0xd5: + case 0xd6: + case 0xd7: + case 0xd8: + case 0xd9: + case 0xda: + case 0xdb: + case 0xdc: + case 0xdd: + case 0xde: + case 0xdf: { add(current); get(); - if (JSON_LIKELY('\x80' <= current and current <= '\xbf')) + if (JSON_LIKELY(0x80 <= current and current <= 0xbf)) { add(current); continue; @@ -11191,15 +11714,15 @@ class basic_json } // U+0800..U+0FFF: bytes E0 A0..BF 80..BF - case '\xe0': + case 0xe0: { add(current); get(); - if (JSON_LIKELY('\xa0' <= current and current <= '\xbf')) + if (JSON_LIKELY(0xa0 <= current and current <= 0xbf)) { add(current); get(); - if (JSON_LIKELY('\x80' <= current and current <= '\xbf')) + if (JSON_LIKELY(0x80 <= current and current <= 0xbf)) { add(current); continue; @@ -11212,28 +11735,28 @@ class basic_json // U+1000..U+CFFF: bytes E1..EC 80..BF 80..BF // U+E000..U+FFFF: bytes EE..EF 80..BF 80..BF - case '\xe1': - case '\xe2': - case '\xe3': - case '\xe4': - case '\xe5': - case '\xe6': - case '\xe7': - case '\xe8': - case '\xe9': - case '\xea': - case '\xeb': - case '\xec': - case '\xee': - case '\xef': + case 0xe1: + case 0xe2: + case 0xe3: + case 0xe4: + case 0xe5: + case 0xe6: + case 0xe7: + case 0xe8: + case 0xe9: + case 0xea: + case 0xeb: + case 0xec: + case 0xee: + case 0xef: { add(current); get(); - if (JSON_LIKELY('\x80' <= current and current <= '\xbf')) + if (JSON_LIKELY(0x80 <= current and current <= 0xbf)) { add(current); get(); - if (JSON_LIKELY('\x80' <= current and current <= '\xbf')) + if (JSON_LIKELY(0x80 <= current and current <= 0xbf)) { add(current); continue; @@ -11245,15 +11768,15 @@ class basic_json } // U+D000..U+D7FF: bytes ED 80..9F 80..BF - case '\xed': + case 0xed: { add(current); get(); - if (JSON_LIKELY('\x80' <= current and current <= '\x9f')) + if (JSON_LIKELY(0x80 <= current and current <= 0x9f)) { add(current); get(); - if (JSON_LIKELY('\x80' <= current and current <= '\xbf')) + if (JSON_LIKELY(0x80 <= current and current <= 0xbf)) { add(current); continue; @@ -11265,19 +11788,19 @@ class basic_json } // U+10000..U+3FFFF F0 90..BF 80..BF 80..BF - case '\xf0': + case 0xf0: { add(current); get(); - if (JSON_LIKELY('\x90' <= current and current <= '\xbf')) + if (JSON_LIKELY(0x90 <= current and current <= 0xbf)) { add(current); get(); - if (JSON_LIKELY('\x80' <= current and current <= '\xbf')) + if (JSON_LIKELY(0x80 <= current and current <= 0xbf)) { add(current); get(); - if (JSON_LIKELY('\x80' <= current and current <= '\xbf')) + if (JSON_LIKELY(0x80 <= current and current <= 0xbf)) { add(current); continue; @@ -11290,21 +11813,21 @@ class basic_json } // U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF - case '\xf1': - case '\xf2': - case '\xf3': + case 0xf1: + case 0xf2: + case 0xf3: { add(current); get(); - if (JSON_LIKELY('\x80' <= current and current <= '\xbf')) + if (JSON_LIKELY(0x80 <= current and current <= 0xbf)) { add(current); get(); - if (JSON_LIKELY('\x80' <= current and current <= '\xbf')) + if (JSON_LIKELY(0x80 <= current and current <= 0xbf)) { add(current); get(); - if (JSON_LIKELY('\x80' <= current and current <= '\xbf')) + if (JSON_LIKELY(0x80 <= current and current <= 0xbf)) { add(current); continue; @@ -11317,19 +11840,19 @@ class basic_json } // U+100000..U+10FFFF F4 80..8F 80..BF 80..BF - case '\xf4': + case 0xf4: { add(current); get(); - if (JSON_LIKELY('\x80' <= current and current <= '\x8f')) + if (JSON_LIKELY(0x80 <= current and current <= 0x8f)) { add(current); get(); - if (JSON_LIKELY('\x80' <= current and current <= '\xbf')) + if (JSON_LIKELY(0x80 <= current and current <= 0xbf)) { add(current); get(); - if (JSON_LIKELY('\x80' <= current and current <= '\xbf')) + if (JSON_LIKELY(0x80 <= current and current <= 0xbf)) { add(current); continue; diff --git a/test/src/unit-cbor.cpp b/test/src/unit-cbor.cpp index f1609f4f..390a1b52 100644 --- a/test/src/unit-cbor.cpp +++ b/test/src/unit-cbor.cpp @@ -1166,35 +1166,35 @@ TEST_CASE("CBOR") CHECK_THROWS_AS(json::from_cbor(std::vector({0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00})), json::parse_error); CHECK_THROWS_WITH(json::from_cbor(std::vector({0x18})), - "[json.exception.parse_error.110] parse error at 2: cannot read 1 bytes from vector"); + "[json.exception.parse_error.110] parse error at 2: unexpected end of input"); CHECK_THROWS_WITH(json::from_cbor(std::vector({0x19})), - "[json.exception.parse_error.110] parse error at 2: cannot read 2 bytes from vector"); + "[json.exception.parse_error.110] parse error at 2: unexpected end of input"); CHECK_THROWS_WITH(json::from_cbor(std::vector({0x19, 0x00})), - "[json.exception.parse_error.110] parse error at 2: cannot read 2 bytes from vector"); + "[json.exception.parse_error.110] parse error at 3: unexpected end of input"); CHECK_THROWS_WITH(json::from_cbor(std::vector({0x1a})), - "[json.exception.parse_error.110] parse error at 2: cannot read 4 bytes from vector"); + "[json.exception.parse_error.110] parse error at 2: unexpected end of input"); CHECK_THROWS_WITH(json::from_cbor(std::vector({0x1a, 0x00})), - "[json.exception.parse_error.110] parse error at 2: cannot read 4 bytes from vector"); + "[json.exception.parse_error.110] parse error at 3: unexpected end of input"); CHECK_THROWS_WITH(json::from_cbor(std::vector({0x1a, 0x00, 0x00})), - "[json.exception.parse_error.110] parse error at 2: cannot read 4 bytes from vector"); + "[json.exception.parse_error.110] parse error at 4: unexpected end of input"); CHECK_THROWS_WITH(json::from_cbor(std::vector({0x1a, 0x00, 0x00, 0x00})), - "[json.exception.parse_error.110] parse error at 2: cannot read 4 bytes from vector"); + "[json.exception.parse_error.110] parse error at 5: unexpected end of input"); CHECK_THROWS_WITH(json::from_cbor(std::vector({0x1b})), - "[json.exception.parse_error.110] parse error at 2: cannot read 8 bytes from vector"); + "[json.exception.parse_error.110] parse error at 2: unexpected end of input"); CHECK_THROWS_WITH(json::from_cbor(std::vector({0x1b, 0x00})), - "[json.exception.parse_error.110] parse error at 2: cannot read 8 bytes from vector"); + "[json.exception.parse_error.110] parse error at 3: unexpected end of input"); CHECK_THROWS_WITH(json::from_cbor(std::vector({0x1b, 0x00, 0x00})), - "[json.exception.parse_error.110] parse error at 2: cannot read 8 bytes from vector"); + "[json.exception.parse_error.110] parse error at 4: unexpected end of input"); CHECK_THROWS_WITH(json::from_cbor(std::vector({0x1b, 0x00, 0x00, 0x00})), - "[json.exception.parse_error.110] parse error at 2: cannot read 8 bytes from vector"); + "[json.exception.parse_error.110] parse error at 5: unexpected end of input"); CHECK_THROWS_WITH(json::from_cbor(std::vector({0x1b, 0x00, 0x00, 0x00, 0x00})), - "[json.exception.parse_error.110] parse error at 2: cannot read 8 bytes from vector"); + "[json.exception.parse_error.110] parse error at 6: unexpected end of input"); CHECK_THROWS_WITH(json::from_cbor(std::vector({0x1b, 0x00, 0x00, 0x00, 0x00, 0x00})), - "[json.exception.parse_error.110] parse error at 2: cannot read 8 bytes from vector"); + "[json.exception.parse_error.110] parse error at 7: unexpected end of input"); CHECK_THROWS_WITH(json::from_cbor(std::vector({0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00})), - "[json.exception.parse_error.110] parse error at 2: cannot read 8 bytes from vector"); + "[json.exception.parse_error.110] parse error at 8: unexpected end of input"); CHECK_THROWS_WITH(json::from_cbor(std::vector({0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00})), - "[json.exception.parse_error.110] parse error at 2: cannot read 8 bytes from vector"); + "[json.exception.parse_error.110] parse error at 9: unexpected end of input"); } SECTION("unsupported bytes") @@ -1756,7 +1756,7 @@ TEST_CASE("examples from RFC 7049 Appendix A") CHECK(json::parse("\"\\ud800\\udd51\"") == json::from_cbor(std::vector({0x64, 0xf0, 0x90, 0x85, 0x91}))); // indefinite length strings - CHECK(json::parse("\"streaming\"") == json::from_cbor(std::vector({0x7f, 0x65, 0x73, 0x74, 0x72, 0x65, 0x61, 0x64, 0x6d, 0x69, 0x6e, 0x67, 0xff}))); + CHECK(json::parse("\"streaming\"") == json::from_cbor(std::vector({0x7f, 0x73, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x69, 0x6e, 0x67, 0xff}))); } SECTION("arrays") diff --git a/test/src/unit-regression.cpp b/test/src/unit-regression.cpp index c221b1ab..fb4f03d1 100644 --- a/test/src/unit-regression.cpp +++ b/test/src/unit-regression.cpp @@ -629,7 +629,7 @@ TEST_CASE("regression tests") std::vector vec {0x65, 0xf5, 0x0a, 0x48, 0x21}; CHECK_THROWS_AS(json::from_cbor(vec), json::parse_error); CHECK_THROWS_WITH(json::from_cbor(vec), - "[json.exception.parse_error.110] parse error at 2: cannot read 5 bytes from vector"); + "[json.exception.parse_error.110] parse error at 6: unexpected end of input"); } SECTION("issue #407 - Heap-buffer-overflow (OSS-Fuzz issue 343)") @@ -650,19 +650,19 @@ TEST_CASE("regression tests") std::vector vec3 {0xf9, 0x8f}; CHECK_THROWS_AS(json::from_cbor(vec3), json::parse_error); CHECK_THROWS_WITH(json::from_cbor(vec3), - "[json.exception.parse_error.110] parse error at 2: cannot read 2 bytes from vector"); + "[json.exception.parse_error.110] parse error at 3: unexpected end of input"); // related test case: incomplete Single-Precision Float (CBOR) std::vector vec4 {0xfa, 0x8f, 0x0a}; CHECK_THROWS_AS(json::from_cbor(vec4), json::parse_error); CHECK_THROWS_WITH(json::from_cbor(vec4), - "[json.exception.parse_error.110] parse error at 2: cannot read 4 bytes from vector"); + "[json.exception.parse_error.110] parse error at 4: unexpected end of input"); // related test case: incomplete Double-Precision Float (CBOR) std::vector vec5 {0xfb, 0x8f, 0x0a}; CHECK_THROWS_AS(json::from_cbor(vec5), json::parse_error); CHECK_THROWS_WITH(json::from_cbor(vec5), - "[json.exception.parse_error.110] parse error at 2: cannot read 8 bytes from vector"); + "[json.exception.parse_error.110] parse error at 4: unexpected end of input"); } SECTION("issue #408 - Heap-buffer-overflow (OSS-Fuzz issue 344)") @@ -705,7 +705,7 @@ TEST_CASE("regression tests") std::vector vec2; CHECK_THROWS_AS(json::from_cbor(vec2), json::parse_error); CHECK_THROWS_WITH(json::from_cbor(vec2), - "[json.exception.parse_error.110] parse error at 1: cannot read 1 bytes from vector"); + "[json.exception.parse_error.110] parse error at 1: unexpected end of input"); CHECK_THROWS_AS(json::from_msgpack(vec2), json::parse_error); CHECK_THROWS_WITH(json::from_msgpack(vec2), "[json.exception.parse_error.110] parse error at 1: cannot read 1 bytes from vector"); @@ -717,19 +717,19 @@ TEST_CASE("regression tests") std::vector vec1 {0x7f}; CHECK_THROWS_AS(json::from_cbor(vec1), json::parse_error); CHECK_THROWS_WITH(json::from_cbor(vec1), - "[json.exception.parse_error.110] parse error at 2: cannot read 1 bytes from vector"); + "[json.exception.parse_error.110] parse error at 2: unexpected end of input"); // related test case: empty array (indefinite length) std::vector vec2 {0x9f}; CHECK_THROWS_AS(json::from_cbor(vec2), json::parse_error); CHECK_THROWS_WITH(json::from_cbor(vec2), - "[json.exception.parse_error.110] parse error at 2: cannot read 1 bytes from vector"); + "[json.exception.parse_error.110] parse error at 2: unexpected end of input"); // related test case: empty map (indefinite length) std::vector vec3 {0xbf}; CHECK_THROWS_AS(json::from_cbor(vec3), json::parse_error); CHECK_THROWS_WITH(json::from_cbor(vec3), - "[json.exception.parse_error.110] parse error at 2: cannot read 1 bytes from vector"); + "[json.exception.parse_error.110] parse error at 2: unexpected end of input"); } SECTION("issue #412 - Heap-buffer-overflow (OSS-Fuzz issue 367)") @@ -763,19 +763,19 @@ TEST_CASE("regression tests") std::vector vec1 {0x7f, 0x61, 0x61}; CHECK_THROWS_AS(json::from_cbor(vec1), json::parse_error); CHECK_THROWS_WITH(json::from_cbor(vec1), - "[json.exception.parse_error.110] parse error at 4: cannot read 1 bytes from vector"); + "[json.exception.parse_error.110] parse error at 4: unexpected end of input"); // related test case: nonempty array (indefinite length) std::vector vec2 {0x9f, 0x01}; CHECK_THROWS_AS(json::from_cbor(vec2), json::parse_error); CHECK_THROWS_WITH(json::from_cbor(vec2), - "[json.exception.parse_error.110] parse error at 3: cannot read 1 bytes from vector"); + "[json.exception.parse_error.110] parse error at 3: unexpected end of input"); // related test case: nonempty map (indefinite length) std::vector vec3 {0xbf, 0x61, 0x61, 0x01}; CHECK_THROWS_AS(json::from_cbor(vec3), json::parse_error); CHECK_THROWS_WITH(json::from_cbor(vec3), - "[json.exception.parse_error.110] parse error at 5: cannot read 1 bytes from vector"); + "[json.exception.parse_error.110] parse error at 5: unexpected end of input"); } SECTION("issue #414 - compare with literal 0)") From ea667db446649a9beb5689e6e5b860831d06323e Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Mon, 3 Apr 2017 21:53:23 +0200 Subject: [PATCH 23/44] :hammer: fixed compilation errors TIL about C++: executing m[key()]=val(); for a std::map executes First key() then val() with Clang First val() then key() with GCC #wat --- src/json.hpp | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/src/json.hpp b/src/json.hpp index e96a53e3..9fadcae8 100644 --- a/src/json.hpp +++ b/src/json.hpp @@ -10722,7 +10722,8 @@ class basic_json for (size_t i = 0; i < len; ++i) { get(); - result[get_cbor_string()] = parse_cbor(); + auto key = get_cbor_string(); + result[key] = parse_cbor(); } return result; } @@ -10734,7 +10735,8 @@ class basic_json for (size_t i = 0; i < len; ++i) { get(); - result[get_cbor_string()] = parse_cbor(); + auto key = get_cbor_string(); + result[key] = parse_cbor(); } return result; } @@ -10746,7 +10748,8 @@ class basic_json for (size_t i = 0; i < len; ++i) { get(); - result[get_cbor_string()] = parse_cbor(); + auto key = get_cbor_string(); + result[key] = parse_cbor(); } return result; } @@ -10758,7 +10761,8 @@ class basic_json for (size_t i = 0; i < len; ++i) { get(); - result[get_cbor_string()] = parse_cbor(); + auto key = get_cbor_string(); + result[key] = parse_cbor(); } return result; } @@ -10770,7 +10774,8 @@ class basic_json for (size_t i = 0; i < len; ++i) { get(); - result[get_cbor_string()] = parse_cbor(); + auto key = get_cbor_string(); + result[key] = parse_cbor(); } return result; } @@ -10780,7 +10785,8 @@ class basic_json basic_json result = value_t::object; while (get() != 0xff) { - result[get_cbor_string()] = parse_cbor(); + auto key = get_cbor_string(); + result[key] = parse_cbor(); } return result; } From 782570d46fffc54ff1a98fc3045a7b9e509138b5 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Mon, 3 Apr 2017 22:58:46 +0200 Subject: [PATCH 24/44] :hammer: cleanup --- src/json.hpp | 683 +++++++-------------------------------------------- 1 file changed, 89 insertions(+), 594 deletions(-) diff --git a/src/json.hpp b/src/json.hpp index 9fadcae8..91a83731 100644 --- a/src/json.hpp +++ b/src/json.hpp @@ -8397,505 +8397,6 @@ class basic_json } } - /*! - @brief create a JSON value from a given CBOR vector - - @param[in] v CBOR serialization - @param[in] idx byte index to start reading from @a v - - @return deserialized JSON value - - @throw parse_error.110 if the given vector ends prematurely - @throw parse_error.112 if unsupported features from CBOR were - used in the given vector @a v or if the input is not valid CBOR - @throw parse_error.113 if a string was expected as map key, but not found - - @sa https://tools.ietf.org/html/rfc7049 - */ - static basic_json from_cbor_internal(const std::vector& v, size_t& idx) - { - // store and increment index - const size_t current_idx = idx++; - - // make sure reading 1 byte is safe - check_length(v.size(), 1, current_idx); - - switch (v[current_idx]) - { - // Integer 0x00..0x17 (0..23) - case 0x00: - case 0x01: - case 0x02: - case 0x03: - case 0x04: - case 0x05: - case 0x06: - case 0x07: - case 0x08: - case 0x09: - case 0x0a: - case 0x0b: - case 0x0c: - case 0x0d: - case 0x0e: - case 0x0f: - case 0x10: - case 0x11: - case 0x12: - case 0x13: - case 0x14: - case 0x15: - case 0x16: - case 0x17: - { - return v[current_idx]; - } - - case 0x18: // Unsigned integer (one-byte uint8_t follows) - { - idx += 1; // skip content byte - return get_from_vector(v, current_idx); - } - - case 0x19: // Unsigned integer (two-byte uint16_t follows) - { - idx += 2; // skip 2 content bytes - return get_from_vector(v, current_idx); - } - - case 0x1a: // Unsigned integer (four-byte uint32_t follows) - { - idx += 4; // skip 4 content bytes - return get_from_vector(v, current_idx); - } - - case 0x1b: // Unsigned integer (eight-byte uint64_t follows) - { - idx += 8; // skip 8 content bytes - return get_from_vector(v, current_idx); - } - - // Negative integer -1-0x00..-1-0x17 (-1..-24) - case 0x20: - case 0x21: - case 0x22: - case 0x23: - case 0x24: - case 0x25: - case 0x26: - case 0x27: - case 0x28: - case 0x29: - case 0x2a: - case 0x2b: - case 0x2c: - case 0x2d: - case 0x2e: - case 0x2f: - case 0x30: - case 0x31: - case 0x32: - case 0x33: - case 0x34: - case 0x35: - case 0x36: - case 0x37: - { - return static_cast(0x20 - 1 - v[current_idx]); - } - - case 0x38: // Negative integer (one-byte uint8_t follows) - { - idx += 1; // skip content byte - // must be uint8_t ! - return static_cast(-1) - get_from_vector(v, current_idx); - } - - case 0x39: // Negative integer -1-n (two-byte uint16_t follows) - { - idx += 2; // skip 2 content bytes - return static_cast(-1) - get_from_vector(v, current_idx); - } - - case 0x3a: // Negative integer -1-n (four-byte uint32_t follows) - { - idx += 4; // skip 4 content bytes - return static_cast(-1) - get_from_vector(v, current_idx); - } - - case 0x3b: // Negative integer -1-n (eight-byte uint64_t follows) - { - idx += 8; // skip 8 content bytes - return static_cast(-1) - static_cast(get_from_vector(v, current_idx)); - } - - // UTF-8 string (0x00..0x17 bytes follow) - case 0x60: - case 0x61: - case 0x62: - case 0x63: - case 0x64: - case 0x65: - case 0x66: - case 0x67: - case 0x68: - case 0x69: - case 0x6a: - case 0x6b: - case 0x6c: - case 0x6d: - case 0x6e: - case 0x6f: - case 0x70: - case 0x71: - case 0x72: - case 0x73: - case 0x74: - case 0x75: - case 0x76: - case 0x77: - { - const auto len = static_cast(v[current_idx] - 0x60); - const size_t offset = current_idx + 1; - idx += len; // skip content bytes - check_length(v.size(), len, offset); - return std::string(reinterpret_cast(v.data()) + offset, len); - } - - case 0x78: // UTF-8 string (one-byte uint8_t for n follows) - { - const auto len = static_cast(get_from_vector(v, current_idx)); - const size_t offset = current_idx + 2; - idx += len + 1; // skip size byte + content bytes - check_length(v.size(), len, offset); - return std::string(reinterpret_cast(v.data()) + offset, len); - } - - case 0x79: // UTF-8 string (two-byte uint16_t for n follow) - { - const auto len = static_cast(get_from_vector(v, current_idx)); - const size_t offset = current_idx + 3; - idx += len + 2; // skip 2 size bytes + content bytes - check_length(v.size(), len, offset); - return std::string(reinterpret_cast(v.data()) + offset, len); - } - - case 0x7a: // UTF-8 string (four-byte uint32_t for n follow) - { - const auto len = static_cast(get_from_vector(v, current_idx)); - const size_t offset = current_idx + 5; - idx += len + 4; // skip 4 size bytes + content bytes - check_length(v.size(), len, offset); - return std::string(reinterpret_cast(v.data()) + offset, len); - } - - case 0x7b: // UTF-8 string (eight-byte uint64_t for n follow) - { - const auto len = static_cast(get_from_vector(v, current_idx)); - const size_t offset = current_idx + 9; - idx += len + 8; // skip 8 size bytes + content bytes - check_length(v.size(), len, offset); - return std::string(reinterpret_cast(v.data()) + offset, len); - } - - case 0x7f: // UTF-8 string (indefinite length) - { - std::string result; - while (static_cast(check_length(v.size(), 1, idx)), v[idx] != 0xff) - { - string_t s = from_cbor_internal(v, idx); - result += s; - } - // skip break byte (0xFF) - idx += 1; - return result; - } - - // array (0x00..0x17 data items follow) - case 0x80: - case 0x81: - case 0x82: - case 0x83: - case 0x84: - case 0x85: - case 0x86: - case 0x87: - case 0x88: - case 0x89: - case 0x8a: - case 0x8b: - case 0x8c: - case 0x8d: - case 0x8e: - case 0x8f: - case 0x90: - case 0x91: - case 0x92: - case 0x93: - case 0x94: - case 0x95: - case 0x96: - case 0x97: - { - basic_json result = value_t::array; - const auto len = static_cast(v[current_idx] - 0x80); - for (size_t i = 0; i < len; ++i) - { - result.push_back(from_cbor_internal(v, idx)); - } - return result; - } - - case 0x98: // array (one-byte uint8_t for n follows) - { - basic_json result = value_t::array; - const auto len = static_cast(get_from_vector(v, current_idx)); - idx += 1; // skip 1 size byte - for (size_t i = 0; i < len; ++i) - { - result.push_back(from_cbor_internal(v, idx)); - } - return result; - } - - case 0x99: // array (two-byte uint16_t for n follow) - { - basic_json result = value_t::array; - const auto len = static_cast(get_from_vector(v, current_idx)); - idx += 2; // skip 4 size bytes - for (size_t i = 0; i < len; ++i) - { - result.push_back(from_cbor_internal(v, idx)); - } - return result; - } - - case 0x9a: // array (four-byte uint32_t for n follow) - { - basic_json result = value_t::array; - const auto len = static_cast(get_from_vector(v, current_idx)); - idx += 4; // skip 4 size bytes - for (size_t i = 0; i < len; ++i) - { - result.push_back(from_cbor_internal(v, idx)); - } - return result; - } - - case 0x9b: // array (eight-byte uint64_t for n follow) - { - basic_json result = value_t::array; - const auto len = static_cast(get_from_vector(v, current_idx)); - idx += 8; // skip 8 size bytes - for (size_t i = 0; i < len; ++i) - { - result.push_back(from_cbor_internal(v, idx)); - } - return result; - } - - case 0x9f: // array (indefinite length) - { - basic_json result = value_t::array; - while (static_cast(check_length(v.size(), 1, idx)), v[idx] != 0xff) - { - result.push_back(from_cbor_internal(v, idx)); - } - // skip break byte (0xFF) - idx += 1; - return result; - } - - // map (0x00..0x17 pairs of data items follow) - case 0xa0: - case 0xa1: - case 0xa2: - case 0xa3: - case 0xa4: - case 0xa5: - case 0xa6: - case 0xa7: - case 0xa8: - case 0xa9: - case 0xaa: - case 0xab: - case 0xac: - case 0xad: - case 0xae: - case 0xaf: - case 0xb0: - case 0xb1: - case 0xb2: - case 0xb3: - case 0xb4: - case 0xb5: - case 0xb6: - case 0xb7: - { - basic_json result = value_t::object; - const auto len = static_cast(v[current_idx] - 0xa0); - for (size_t i = 0; i < len; ++i) - { - cbor_expect_string(v, idx); - std::string key = from_cbor_internal(v, idx); - result[key] = from_cbor_internal(v, idx); - } - return result; - } - - case 0xb8: // map (one-byte uint8_t for n follows) - { - basic_json result = value_t::object; - const auto len = static_cast(get_from_vector(v, current_idx)); - idx += 1; // skip 1 size byte - for (size_t i = 0; i < len; ++i) - { - cbor_expect_string(v, idx); - std::string key = from_cbor_internal(v, idx); - result[key] = from_cbor_internal(v, idx); - } - return result; - } - - case 0xb9: // map (two-byte uint16_t for n follow) - { - basic_json result = value_t::object; - const auto len = static_cast(get_from_vector(v, current_idx)); - idx += 2; // skip 2 size bytes - for (size_t i = 0; i < len; ++i) - { - cbor_expect_string(v, idx); - std::string key = from_cbor_internal(v, idx); - result[key] = from_cbor_internal(v, idx); - } - return result; - } - - case 0xba: // map (four-byte uint32_t for n follow) - { - basic_json result = value_t::object; - const auto len = static_cast(get_from_vector(v, current_idx)); - idx += 4; // skip 4 size bytes - for (size_t i = 0; i < len; ++i) - { - cbor_expect_string(v, idx); - std::string key = from_cbor_internal(v, idx); - result[key] = from_cbor_internal(v, idx); - } - return result; - } - - case 0xbb: // map (eight-byte uint64_t for n follow) - { - basic_json result = value_t::object; - const auto len = static_cast(get_from_vector(v, current_idx)); - idx += 8; // skip 8 size bytes - for (size_t i = 0; i < len; ++i) - { - cbor_expect_string(v, idx); - std::string key = from_cbor_internal(v, idx); - result[key] = from_cbor_internal(v, idx); - } - return result; - } - - case 0xbf: // map (indefinite length) - { - basic_json result = value_t::object; - while (static_cast(check_length(v.size(), 1, idx)), v[idx] != 0xff) - { - cbor_expect_string(v, idx); - std::string key = from_cbor_internal(v, idx); - result[key] = from_cbor_internal(v, idx); - } - // skip break byte (0xFF) - idx += 1; - return result; - } - - case 0xf4: // false - { - return false; - } - - case 0xf5: // true - { - return true; - } - - case 0xf6: // null - { - return value_t::null; - } - - case 0xf9: // Half-Precision Float (two-byte IEEE 754) - { - idx += 2; // skip two content bytes - - // code from RFC 7049, Appendix D, Figure 3: - // As half-precision floating-point numbers were only added to - // IEEE 754 in 2008, today's programming platforms often still - // only have limited support for them. It is very easy to - // include at least decoding support for them even without such - // support. An example of a small decoder for half-precision - // floating-point numbers in the C language is shown in Fig. 3. - check_length(v.size(), 2, current_idx + 1); - const int half = (v[current_idx + 1] << 8) + v[current_idx + 2]; - const int exp = (half >> 10) & 0x1f; - const int mant = half & 0x3ff; - double val; - if (exp == 0) - { - val = std::ldexp(mant, -24); - } - else if (exp != 31) - { - val = std::ldexp(mant + 1024, exp - 25); - } - else - { - val = mant == 0 - ? std::numeric_limits::infinity() - : std::numeric_limits::quiet_NaN(); - } - return (half & 0x8000) != 0 ? -val : val; - } - - case 0xfa: // Single-Precision Float (four-byte IEEE 754) - { - // copy bytes in reverse order into the float variable - float res; - check_length(v.size(), sizeof(float), current_idx + 1); - for (size_t byte = 0; byte < sizeof(float); ++byte) - { - reinterpret_cast(&res)[sizeof(float) - byte - 1] = v[current_idx + 1 + byte]; - } - idx += sizeof(float); // skip content bytes - return res; - } - - case 0xfb: // Double-Precision Float (eight-byte IEEE 754) - { - // copy bytes in reverse order into the double variable - double res; - check_length(v.size(), sizeof(double), current_idx + 1); - for (size_t byte = 0; byte < sizeof(double); ++byte) - { - reinterpret_cast(&res)[sizeof(double) - byte - 1] = v[current_idx + 1 + byte]; - } - idx += sizeof(double); // skip content bytes - return res; - } - - default: // anything else (0xFF is handled inside the other types) - { - std::stringstream ss; - ss << std::hex << static_cast(v[current_idx]); - JSON_THROW(parse_error::create(112, current_idx + 1, "error reading CBOR; last byte: 0x" + ss.str())); - } - } - } - public: /*! @brief create a MessagePack serialization of a given JSON value @@ -9053,7 +8554,7 @@ class basic_json } /*! - @brief create a MessagePack serialization of a given JSON value + @brief create a CBOR serialization of a given JSON value Serializes a given JSON value @a j to a byte vector using the CBOR (Concise Binary Object Representation) serialization format. CBOR is a binary @@ -9141,100 +8642,6 @@ class basic_json return result; } - /*! - @brief create a JSON value from a byte vector in CBOR format - - Deserializes a given byte vector @a v to a JSON value using the CBOR - (Concise Binary Object Representation) serialization format. - - The library maps CBOR types to JSON value types as follows: - - CBOR type | JSON value type | first byte - ---------------------- | --------------- | ---------- - Integer | number_unsigned | 0x00..0x17 - Unsigned integer | number_unsigned | 0x18 - Unsigned integer | number_unsigned | 0x19 - Unsigned integer | number_unsigned | 0x1a - Unsigned integer | number_unsigned | 0x1b - Negative integer | number_integer | 0x20..0x37 - Negative integer | number_integer | 0x38 - Negative integer | number_integer | 0x39 - Negative integer | number_integer | 0x3a - Negative integer | number_integer | 0x3b - Negative integer | number_integer | 0x40..0x57 - UTF-8 string | string | 0x60..0x77 - UTF-8 string | string | 0x78 - UTF-8 string | string | 0x79 - UTF-8 string | string | 0x7a - UTF-8 string | string | 0x7b - UTF-8 string | string | 0x7f - array | array | 0x80..0x97 - array | array | 0x98 - array | array | 0x99 - array | array | 0x9a - array | array | 0x9b - array | array | 0x9f - map | object | 0xa0..0xb7 - map | object | 0xb8 - map | object | 0xb9 - map | object | 0xba - map | object | 0xbb - map | object | 0xbf - False | `false` | 0xf4 - True | `true` | 0xf5 - Nill | `null` | 0xf6 - Half-Precision Float | number_float | 0xf9 - Single-Precision Float | number_float | 0xfa - Double-Precision Float | number_float | 0xfb - - @warning The mapping is **incomplete** in the sense that not all CBOR - types can be converted to a JSON value. The following CBOR types - are not supported and will yield parse errors (parse_error.112): - - byte strings (0x40..0x5f) - - date/time (0xc0..0xc1) - - bignum (0xc2..0xc3) - - decimal fraction (0xc4) - - bigfloat (0xc5) - - tagged items (0xc6..0xd4, 0xd8..0xdb) - - expected conversions (0xd5..0xd7) - - simple values (0xe0..0xf3, 0xf8) - - undefined (0xf7) - - @warning CBOR allows map keys of any type, whereas JSON only allows - strings as keys in object values. Therefore, CBOR maps with keys - other than UTF-8 strings are rejected (parse_error.113). - - @note Any CBOR output created @ref to_cbor can be successfully parsed by - @ref from_cbor. - - @param[in] v a byte vector in CBOR format - @param[in] start_index the index to start reading from @a v (0 by default) - @return deserialized JSON value - - @throw parse_error.110 if the given vector ends prematurely - @throw parse_error.112 if unsupported features from CBOR were - used in the given vector @a v or if the input is not valid CBOR - @throw parse_error.113 if a string was expected as map key, but not found - - @complexity Linear in the size of the byte vector @a v. - - @liveexample{The example shows the deserialization of a byte vector in CBOR - format to a JSON value.,from_cbor} - - @sa http://cbor.io - @sa @ref to_cbor(const basic_json&) for the analogous serialization - @sa @ref from_msgpack(const std::vector&, const size_t) for the - related MessagePack format - - @since version 2.0.9, parameter @a start_index since 2.1.1 - */ - //static basic_json from_cbor(const std::vector& v, - // const size_t start_index = 0) - //{ - // size_t i = start_index; - // return from_cbor_internal(v, i); - //} - /// @} /////////////////////////// @@ -10996,6 +10403,94 @@ class basic_json }; public: + + /*! + @brief create a JSON value from a byte vector in CBOR format + + Deserializes a given byte vector @a v to a JSON value using the CBOR + (Concise Binary Object Representation) serialization format. + + The library maps CBOR types to JSON value types as follows: + + CBOR type | JSON value type | first byte + ---------------------- | --------------- | ---------- + Integer | number_unsigned | 0x00..0x17 + Unsigned integer | number_unsigned | 0x18 + Unsigned integer | number_unsigned | 0x19 + Unsigned integer | number_unsigned | 0x1a + Unsigned integer | number_unsigned | 0x1b + Negative integer | number_integer | 0x20..0x37 + Negative integer | number_integer | 0x38 + Negative integer | number_integer | 0x39 + Negative integer | number_integer | 0x3a + Negative integer | number_integer | 0x3b + Negative integer | number_integer | 0x40..0x57 + UTF-8 string | string | 0x60..0x77 + UTF-8 string | string | 0x78 + UTF-8 string | string | 0x79 + UTF-8 string | string | 0x7a + UTF-8 string | string | 0x7b + UTF-8 string | string | 0x7f + array | array | 0x80..0x97 + array | array | 0x98 + array | array | 0x99 + array | array | 0x9a + array | array | 0x9b + array | array | 0x9f + map | object | 0xa0..0xb7 + map | object | 0xb8 + map | object | 0xb9 + map | object | 0xba + map | object | 0xbb + map | object | 0xbf + False | `false` | 0xf4 + True | `true` | 0xf5 + Nill | `null` | 0xf6 + Half-Precision Float | number_float | 0xf9 + Single-Precision Float | number_float | 0xfa + Double-Precision Float | number_float | 0xfb + + @warning The mapping is **incomplete** in the sense that not all CBOR + types can be converted to a JSON value. The following CBOR types + are not supported and will yield parse errors (parse_error.112): + - byte strings (0x40..0x5f) + - date/time (0xc0..0xc1) + - bignum (0xc2..0xc3) + - decimal fraction (0xc4) + - bigfloat (0xc5) + - tagged items (0xc6..0xd4, 0xd8..0xdb) + - expected conversions (0xd5..0xd7) + - simple values (0xe0..0xf3, 0xf8) + - undefined (0xf7) + + @warning CBOR allows map keys of any type, whereas JSON only allows + strings as keys in object values. Therefore, CBOR maps with keys + other than UTF-8 strings are rejected (parse_error.113). + + @note Any CBOR output created @ref to_cbor can be successfully parsed by + @ref from_cbor. + + @param[in] v a byte vector in CBOR format + @param[in] start_index the index to start reading from @a v (0 by default) + @return deserialized JSON value + + @throw parse_error.110 if the given vector ends prematurely + @throw parse_error.112 if unsupported features from CBOR were + used in the given vector @a v or if the input is not valid CBOR + @throw parse_error.113 if a string was expected as map key, but not found + + @complexity Linear in the size of the byte vector @a v. + + @liveexample{The example shows the deserialization of a byte vector in CBOR + format to a JSON value.,from_cbor} + + @sa http://cbor.io + @sa @ref to_cbor(const basic_json&) for the analogous serialization + @sa @ref from_msgpack(const std::vector&, const size_t) for the + related MessagePack format + + @since version 2.0.9, parameter @a start_index since 2.1.1 + */ static basic_json from_cbor(const std::vector& v, const size_t start_index = 0) { From b15fc13dd194651a5acce92952ab3c09e0a39722 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Tue, 4 Apr 2017 16:59:19 +0200 Subject: [PATCH 25/44] :hammer: implemented MessagePack in binary_reader --- src/json.hpp | 1024 ++++++++++++++++++---------------- test/src/unit-cbor.cpp | 7 - test/src/unit-msgpack.cpp | 30 +- test/src/unit-regression.cpp | 8 +- 4 files changed, 567 insertions(+), 502 deletions(-) diff --git a/src/json.hpp b/src/json.hpp index e6fbabdf..6853d8c0 100644 --- a/src/json.hpp +++ b/src/json.hpp @@ -7480,57 +7480,6 @@ class basic_json } } - /*! - @brief take sufficient bytes from a vector to fill an integer variable - - In the context of binary serialization formats, we need to read several - bytes from a byte vector and combine them to multi-byte integral data - types. - - @param[in] vec byte vector to read from - @param[in] current_index the position in the vector after which to read - - @return the next sizeof(T) bytes from @a vec, in reverse order as T - - @tparam T the integral return type - - @throw parse_error.110 if there are less than sizeof(T)+1 bytes in the - vector @a vec to read - - In the for loop, the bytes from the vector are copied in reverse order into - the return value. In the figures below, let sizeof(T)=4 and `i` be the loop - variable. - - Precondition: - - vec: | | | a | b | c | d | T: | | | | | - ^ ^ ^ ^ - current_index i ptr sizeof(T) - - Postcondition: - - vec: | | | a | b | c | d | T: | d | c | b | a | - ^ ^ ^ - | i ptr - current_index - - @sa Code adapted from . - */ - template - static T get_from_vector(const std::vector& vec, const size_t current_index) - { - // check if we can read sizeof(T) bytes starting the next index - check_length(vec.size(), sizeof(T), current_index + 1); - - T result; - auto* ptr = reinterpret_cast(&result); - for (size_t i = 0; i < sizeof(T); ++i) - { - *ptr++ = vec[current_index + sizeof(T) - i]; - } - return result; - } - /*! @brief create a MessagePack serialization of a given JSON value @@ -8041,357 +7990,6 @@ class basic_json } } - - /* - @brief checks if given lengths do not exceed the size of a given vector - - To secure the access to the byte vector during CBOR/MessagePack - deserialization, bytes are copied from the vector into buffers. This - function checks if the number of bytes to copy (@a len) does not exceed - the size @s size of the vector. Additionally, an @a offset is given from - where to start reading the bytes. - - This function checks whether reading the bytes is safe; that is, offset is - a valid index in the vector, offset+len - - @param[in] size size of the byte vector - @param[in] len number of bytes to read - @param[in] offset offset where to start reading - - vec: x x x x x X X X X X - ^ ^ ^ - 0 offset len - - @throws out_of_range if `len > v.size()` - */ - static void check_length(const size_t size, const size_t len, const size_t offset) - { - // simple case: requested length is greater than the vector's length - if (len > size or offset > size) - { - JSON_THROW(parse_error::create(110, offset + 1, "cannot read " + std::to_string(len) + " bytes from vector")); - } - - // second case: adding offset would result in overflow - if ((size > ((std::numeric_limits::max)() - offset))) - { - JSON_THROW(parse_error::create(110, offset + 1, "cannot read " + std::to_string(len) + " bytes from vector")); - } - - // last case: reading past the end of the vector - if (len + offset > size) - { - JSON_THROW(parse_error::create(110, offset + 1, "cannot read " + std::to_string(len) + " bytes from vector")); - } - } - - /*! - @brief check if the next byte belongs to a string - - While parsing a map, the keys must be strings. This function checks if the - current byte is one of the start bytes for a string in MessagePack: - - - 0xa0 - 0xbf: fixstr - - 0xd9: str 8 - - 0xda: str 16 - - 0xdb: str 32 - - @param[in] v MessagePack serialization - @param[in] idx byte index in @a v to check for a string - - @throw parse_error.113 if `v[idx]` does not belong to a string - */ - static void msgpack_expect_string(const std::vector& v, size_t idx) - { - check_length(v.size(), 1, idx); - - const auto byte = v[idx]; - if ((byte >= 0xa0 and byte <= 0xbf) or (byte >= 0xd9 and byte <= 0xdb)) - { - return; - } - - std::stringstream ss; - ss << std::hex << static_cast(v[idx]); - JSON_THROW(parse_error::create(113, idx + 1, "expected a MessagePack string; last byte: 0x" + ss.str())); - } - - /*! - @brief check if the next byte belongs to a string - - While parsing a map, the keys must be strings. This function checks if the - current byte is one of the start bytes for a string in CBOR: - - - 0x60 - 0x77: fixed length - - 0x78 - 0x7b: variable length - - 0x7f: indefinity length - - @param[in] v CBOR serialization - @param[in] idx byte index in @a v to check for a string - - @throw parse_error.113 if `v[idx]` does not belong to a string - */ - static void cbor_expect_string(const std::vector& v, size_t idx) - { - check_length(v.size(), 1, idx); - - const auto byte = v[idx]; - if ((byte >= 0x60 and byte <= 0x7b) or byte == 0x7f) - { - return; - } - - std::stringstream ss; - ss << std::hex << static_cast(v[idx]); - JSON_THROW(parse_error::create(113, idx + 1, "expected a CBOR string; last byte: 0x" + ss.str())); - } - - /*! - @brief create a JSON value from a given MessagePack vector - - @param[in] v MessagePack serialization - @param[in] idx byte index to start reading from @a v - - @return deserialized JSON value - - @throw parse_error.110 if the given vector ends prematurely - @throw parse_error.112 if unsupported features from MessagePack were - used in the given vector @a v or if the input is not valid MessagePack - @throw parse_error.113 if a string was expected as map key, but not found - - @sa https://github.com/msgpack/msgpack/blob/master/spec.md - */ - static basic_json from_msgpack_internal(const std::vector& v, size_t& idx) - { - // store and increment index - const size_t current_idx = idx++; - - // make sure reading 1 byte is safe - check_length(v.size(), 1, current_idx); - - if (v[current_idx] <= 0xbf) - { - if (v[current_idx] <= 0x7f) // positive fixint - { - return v[current_idx]; - } - if (v[current_idx] <= 0x8f) // fixmap - { - basic_json result = value_t::object; - const size_t len = v[current_idx] & 0x0f; - for (size_t i = 0; i < len; ++i) - { - msgpack_expect_string(v, idx); - std::string key = from_msgpack_internal(v, idx); - result[key] = from_msgpack_internal(v, idx); - } - return result; - } - else if (v[current_idx] <= 0x9f) // fixarray - { - basic_json result = value_t::array; - const size_t len = v[current_idx] & 0x0f; - for (size_t i = 0; i < len; ++i) - { - result.push_back(from_msgpack_internal(v, idx)); - } - return result; - } - else // fixstr - { - const size_t len = v[current_idx] & 0x1f; - const size_t offset = current_idx + 1; - idx += len; // skip content bytes - check_length(v.size(), len, offset); - return std::string(reinterpret_cast(v.data()) + offset, len); - } - } - else if (v[current_idx] >= 0xe0) // negative fixint - { - return static_cast(v[current_idx]); - } - else - { - switch (v[current_idx]) - { - case 0xc0: // nil - { - return value_t::null; - } - - case 0xc2: // false - { - return false; - } - - case 0xc3: // true - { - return true; - } - - case 0xca: // float 32 - { - // copy bytes in reverse order into the double variable - float res; - check_length(v.size(), sizeof(float), current_idx + 1); - for (size_t byte = 0; byte < sizeof(float); ++byte) - { - reinterpret_cast(&res)[sizeof(float) - byte - 1] = v[current_idx + 1 + byte]; - } - idx += sizeof(float); // skip content bytes - return res; - } - - case 0xcb: // float 64 - { - // copy bytes in reverse order into the double variable - double res; - check_length(v.size(), sizeof(double), current_idx + 1); - for (size_t byte = 0; byte < sizeof(double); ++byte) - { - reinterpret_cast(&res)[sizeof(double) - byte - 1] = v[current_idx + 1 + byte]; - } - idx += sizeof(double); // skip content bytes - return res; - } - - case 0xcc: // uint 8 - { - idx += 1; // skip content byte - return get_from_vector(v, current_idx); - } - - case 0xcd: // uint 16 - { - idx += 2; // skip 2 content bytes - return get_from_vector(v, current_idx); - } - - case 0xce: // uint 32 - { - idx += 4; // skip 4 content bytes - return get_from_vector(v, current_idx); - } - - case 0xcf: // uint 64 - { - idx += 8; // skip 8 content bytes - return get_from_vector(v, current_idx); - } - - case 0xd0: // int 8 - { - idx += 1; // skip content byte - return get_from_vector(v, current_idx); - } - - case 0xd1: // int 16 - { - idx += 2; // skip 2 content bytes - return get_from_vector(v, current_idx); - } - - case 0xd2: // int 32 - { - idx += 4; // skip 4 content bytes - return get_from_vector(v, current_idx); - } - - case 0xd3: // int 64 - { - idx += 8; // skip 8 content bytes - return get_from_vector(v, current_idx); - } - - case 0xd9: // str 8 - { - const auto len = static_cast(get_from_vector(v, current_idx)); - const size_t offset = current_idx + 2; - idx += len + 1; // skip size byte + content bytes - check_length(v.size(), len, offset); - return std::string(reinterpret_cast(v.data()) + offset, len); - } - - case 0xda: // str 16 - { - const auto len = static_cast(get_from_vector(v, current_idx)); - const size_t offset = current_idx + 3; - idx += len + 2; // skip 2 size bytes + content bytes - check_length(v.size(), len, offset); - return std::string(reinterpret_cast(v.data()) + offset, len); - } - - case 0xdb: // str 32 - { - const auto len = static_cast(get_from_vector(v, current_idx)); - const size_t offset = current_idx + 5; - idx += len + 4; // skip 4 size bytes + content bytes - check_length(v.size(), len, offset); - return std::string(reinterpret_cast(v.data()) + offset, len); - } - - case 0xdc: // array 16 - { - basic_json result = value_t::array; - const auto len = static_cast(get_from_vector(v, current_idx)); - idx += 2; // skip 2 size bytes - for (size_t i = 0; i < len; ++i) - { - result.push_back(from_msgpack_internal(v, idx)); - } - return result; - } - - case 0xdd: // array 32 - { - basic_json result = value_t::array; - const auto len = static_cast(get_from_vector(v, current_idx)); - idx += 4; // skip 4 size bytes - for (size_t i = 0; i < len; ++i) - { - result.push_back(from_msgpack_internal(v, idx)); - } - return result; - } - - case 0xde: // map 16 - { - basic_json result = value_t::object; - const auto len = static_cast(get_from_vector(v, current_idx)); - idx += 2; // skip 2 size bytes - for (size_t i = 0; i < len; ++i) - { - msgpack_expect_string(v, idx); - std::string key = from_msgpack_internal(v, idx); - result[key] = from_msgpack_internal(v, idx); - } - return result; - } - - case 0xdf: // map 32 - { - basic_json result = value_t::object; - const auto len = static_cast(get_from_vector(v, current_idx)); - idx += 4; // skip 4 size bytes - for (size_t i = 0; i < len; ++i) - { - msgpack_expect_string(v, idx); - std::string key = from_msgpack_internal(v, idx); - result[key] = from_msgpack_internal(v, idx); - } - return result; - } - - default: - { - std::stringstream ss; - ss << std::hex << static_cast(v[current_idx]); - JSON_THROW(parse_error::create(112, current_idx + 1, "error reading MessagePack; last byte: 0x" + ss.str())); - } - } - } - } - public: /*! @brief create a MessagePack serialization of a given JSON value @@ -8474,80 +8072,6 @@ class basic_json return result; } - /*! - @brief create a JSON value from a byte vector in MessagePack format - - Deserializes a given byte vector @a v to a JSON value using the MessagePack - serialization format. - - The library maps MessagePack types to JSON value types as follows: - - MessagePack type | JSON value type | first byte - ---------------- | --------------- | ---------- - positive fixint | number_unsigned | 0x00..0x7f - fixmap | object | 0x80..0x8f - fixarray | array | 0x90..0x9f - fixstr | string | 0xa0..0xbf - nil | `null` | 0xc0 - false | `false` | 0xc2 - true | `true` | 0xc3 - float 32 | number_float | 0xca - float 64 | number_float | 0xcb - uint 8 | number_unsigned | 0xcc - uint 16 | number_unsigned | 0xcd - uint 32 | number_unsigned | 0xce - uint 64 | number_unsigned | 0xcf - int 8 | number_integer | 0xd0 - int 16 | number_integer | 0xd1 - int 32 | number_integer | 0xd2 - int 64 | number_integer | 0xd3 - str 8 | string | 0xd9 - str 16 | string | 0xda - str 32 | string | 0xdb - array 16 | array | 0xdc - array 32 | array | 0xdd - map 16 | object | 0xde - map 32 | object | 0xdf - negative fixint | number_integer | 0xe0-0xff - - @warning The mapping is **incomplete** in the sense that not all - MessagePack types can be converted to a JSON value. The following - MessagePack types are not supported and will yield parse errors: - - bin 8 - bin 32 (0xc4..0xc6) - - ext 8 - ext 32 (0xc7..0xc9) - - fixext 1 - fixext 16 (0xd4..0xd8) - - @note Any MessagePack output created @ref to_msgpack can be successfully - parsed by @ref from_msgpack. - - @param[in] v a byte vector in MessagePack format - @param[in] start_index the index to start reading from @a v (0 by default) - @return deserialized JSON value - - @throw parse_error.110 if the given vector ends prematurely - @throw parse_error.112 if unsupported features from MessagePack were - used in the given vector @a v or if the input is not valid MessagePack - @throw parse_error.113 if a string was expected as map key, but not found - - @complexity Linear in the size of the byte vector @a v. - - @liveexample{The example shows the deserialization of a byte vector in - MessagePack format to a JSON value.,from_msgpack} - - @sa http://msgpack.org - @sa @ref to_msgpack(const basic_json&) for the analogous serialization - @sa @ref from_cbor(const std::vector&, const size_t) for the - related CBOR format - - @since version 2.0.9, parameter @a start_index since 2.1.1 - */ - static basic_json from_msgpack(const std::vector& v, - const size_t start_index = 0) - { - size_t i = start_index; - return from_msgpack_internal(v, i); - } - /*! @brief create a CBOR serialization of a given JSON value @@ -10262,6 +9786,408 @@ class basic_json } } + basic_json parse_msgpack() + { + switch (get()) + { + // EOF + case std::char_traits::eof(): + { + JSON_THROW(parse_error::create(110, chars_read, "unexpected end of input")); + } + + // positive fixint + case 0x00: + case 0x01: + case 0x02: + case 0x03: + case 0x04: + case 0x05: + case 0x06: + case 0x07: + case 0x08: + case 0x09: + case 0x0a: + case 0x0b: + case 0x0c: + case 0x0d: + case 0x0e: + case 0x0f: + case 0x10: + case 0x11: + case 0x12: + case 0x13: + case 0x14: + case 0x15: + case 0x16: + case 0x17: + case 0x18: + case 0x19: + case 0x1a: + case 0x1b: + case 0x1c: + case 0x1d: + case 0x1e: + case 0x1f: + case 0x20: + case 0x21: + case 0x22: + case 0x23: + case 0x24: + case 0x25: + case 0x26: + case 0x27: + case 0x28: + case 0x29: + case 0x2a: + case 0x2b: + case 0x2c: + case 0x2d: + case 0x2e: + case 0x2f: + case 0x30: + case 0x31: + case 0x32: + case 0x33: + case 0x34: + case 0x35: + case 0x36: + case 0x37: + case 0x38: + case 0x39: + case 0x3a: + case 0x3b: + case 0x3c: + case 0x3d: + case 0x3e: + case 0x3f: + case 0x40: + case 0x41: + case 0x42: + case 0x43: + case 0x44: + case 0x45: + case 0x46: + case 0x47: + case 0x48: + case 0x49: + case 0x4a: + case 0x4b: + case 0x4c: + case 0x4d: + case 0x4e: + case 0x4f: + case 0x50: + case 0x51: + case 0x52: + case 0x53: + case 0x54: + case 0x55: + case 0x56: + case 0x57: + case 0x58: + case 0x59: + case 0x5a: + case 0x5b: + case 0x5c: + case 0x5d: + case 0x5e: + case 0x5f: + case 0x60: + case 0x61: + case 0x62: + case 0x63: + case 0x64: + case 0x65: + case 0x66: + case 0x67: + case 0x68: + case 0x69: + case 0x6a: + case 0x6b: + case 0x6c: + case 0x6d: + case 0x6e: + case 0x6f: + case 0x70: + case 0x71: + case 0x72: + case 0x73: + case 0x74: + case 0x75: + case 0x76: + case 0x77: + case 0x78: + case 0x79: + case 0x7a: + case 0x7b: + case 0x7c: + case 0x7d: + case 0x7e: + case 0x7f: + { + return static_cast(current); + } + + // fixmap + case 0x80: + case 0x81: + case 0x82: + case 0x83: + case 0x84: + case 0x85: + case 0x86: + case 0x87: + case 0x88: + case 0x89: + case 0x8a: + case 0x8b: + case 0x8c: + case 0x8d: + case 0x8e: + case 0x8f: + { + basic_json result = value_t::object; + const auto len = static_cast(current & 0x0f); + for (size_t i = 0; i < len; ++i) + { + get(); + auto key = get_msgpack_string(); + result[key] = parse_msgpack(); + } + return result; + } + + // fixarray + case 0x90: + case 0x91: + case 0x92: + case 0x93: + case 0x94: + case 0x95: + case 0x96: + case 0x97: + case 0x98: + case 0x99: + case 0x9a: + case 0x9b: + case 0x9c: + case 0x9d: + case 0x9e: + case 0x9f: + { + basic_json result = value_t::array; + const auto len = static_cast(current & 0x0f); + for (size_t i = 0; i < len; ++i) + { + result.push_back(parse_msgpack()); + } + return result; + } + + // fixstr + case 0xa0: + case 0xa1: + case 0xa2: + case 0xa3: + case 0xa4: + case 0xa5: + case 0xa6: + case 0xa7: + case 0xa8: + case 0xa9: + case 0xaa: + case 0xab: + case 0xac: + case 0xad: + case 0xae: + case 0xaf: + case 0xb0: + case 0xb1: + case 0xb2: + case 0xb3: + case 0xb4: + case 0xb5: + case 0xb6: + case 0xb7: + case 0xb8: + case 0xb9: + case 0xba: + case 0xbb: + case 0xbc: + case 0xbd: + case 0xbe: + case 0xbf: + { + return get_msgpack_string(); + } + + case 0xc0: // nil + { + return value_t::null; + } + + case 0xc2: // false + { + return false; + } + + case 0xc3: // true + { + return true; + } + + case 0xca: // float 32 + { + return get_number(); + } + + case 0xcb: // float 64 + { + return get_number(); + } + + case 0xcc: // uint 8 + { + return get_number(); + } + + case 0xcd: // uint 16 + { + return get_number(); + } + + case 0xce: // uint 32 + { + return get_number(); + } + + case 0xcf: // uint 64 + { + return get_number(); + } + + case 0xd0: // int 8 + { + return get_number(); + } + + case 0xd1: // int 16 + { + return get_number(); + } + + case 0xd2: // int 32 + { + return get_number(); + } + + case 0xd3: // int 64 + { + return get_number(); + } + + case 0xd9: // str 8 + case 0xda: // str 16 + case 0xdb: // str 32 + { + return get_msgpack_string(); + } + + case 0xdc: // array 16 + { + basic_json result = value_t::array; + const auto len = static_cast(get_number()); + for (size_t i = 0; i < len; ++i) + { + result.push_back(parse_msgpack()); + } + return result; + } + + case 0xdd: // array 32 + { + basic_json result = value_t::array; + const auto len = static_cast(get_number()); + for (size_t i = 0; i < len; ++i) + { + result.push_back(parse_msgpack()); + } + return result; + } + + case 0xde: // map 16 + { + basic_json result = value_t::object; + const auto len = static_cast(get_number()); + for (size_t i = 0; i < len; ++i) + { + get(); + auto key = get_msgpack_string(); + result[key] = parse_msgpack(); + } + return result; + } + + case 0xdf: // map 32 + { + basic_json result = value_t::object; + const auto len = static_cast(get_number()); + for (size_t i = 0; i < len; ++i) + { + get(); + auto key = get_msgpack_string(); + result[key] = parse_msgpack(); + } + return result; + } + + // positive fixint + case 0xe0: + case 0xe1: + case 0xe2: + case 0xe3: + case 0xe4: + case 0xe5: + case 0xe6: + case 0xe7: + case 0xe8: + case 0xe9: + case 0xea: + case 0xeb: + case 0xec: + case 0xed: + case 0xee: + case 0xef: + case 0xf0: + case 0xf1: + case 0xf2: + case 0xf3: + case 0xf4: + case 0xf5: + case 0xf6: + case 0xf7: + case 0xf8: + case 0xf9: + case 0xfa: + case 0xfb: + case 0xfc: + case 0xfd: + case 0xfe: + case 0xff: + { + return static_cast(current); + } + + default: // anything else + { + std::stringstream ss; + ss << std::hex << current; + JSON_THROW(parse_error::create(112, chars_read, "error reading MessagePack; last byte: 0x" + ss.str())); + } + } + } + private: int get() { @@ -10378,6 +10304,77 @@ class basic_json } } + std::string get_msgpack_string() + { + check_eof(); + + switch (current) + { + // fixstr + case 0xa0: + case 0xa1: + case 0xa2: + case 0xa3: + case 0xa4: + case 0xa5: + case 0xa6: + case 0xa7: + case 0xa8: + case 0xa9: + case 0xaa: + case 0xab: + case 0xac: + case 0xad: + case 0xae: + case 0xaf: + case 0xb0: + case 0xb1: + case 0xb2: + case 0xb3: + case 0xb4: + case 0xb5: + case 0xb6: + case 0xb7: + case 0xb8: + case 0xb9: + case 0xba: + case 0xbb: + case 0xbc: + case 0xbd: + case 0xbe: + case 0xbf: + { + const auto len = static_cast(current & 0x1f); + return get_string(len); + } + + case 0xd9: // str 8 + { + const auto len = static_cast(get_number()); + return get_string(len); + } + + case 0xda: // str 16 + { + const auto len = static_cast(get_number()); + return get_string(len); + } + + case 0xdb: // str 32 + { + const auto len = static_cast(get_number()); + return get_string(len); + } + + default: + { + std::stringstream ss; + ss << std::hex << current; + JSON_THROW(parse_error::create(113, chars_read, "expected a MessagePack string; last byte: 0x" + ss.str())); + } + } + } + void check_eof() { if (JSON_UNLIKELY(current == std::char_traits::eof())) @@ -10493,6 +10490,81 @@ class basic_json return br.parse_cbor(); } + + /*! + @brief create a JSON value from a byte vector in MessagePack format + + Deserializes a given byte vector @a v to a JSON value using the MessagePack + serialization format. + + The library maps MessagePack types to JSON value types as follows: + + MessagePack type | JSON value type | first byte + ---------------- | --------------- | ---------- + positive fixint | number_unsigned | 0x00..0x7f + fixmap | object | 0x80..0x8f + fixarray | array | 0x90..0x9f + fixstr | string | 0xa0..0xbf + nil | `null` | 0xc0 + false | `false` | 0xc2 + true | `true` | 0xc3 + float 32 | number_float | 0xca + float 64 | number_float | 0xcb + uint 8 | number_unsigned | 0xcc + uint 16 | number_unsigned | 0xcd + uint 32 | number_unsigned | 0xce + uint 64 | number_unsigned | 0xcf + int 8 | number_integer | 0xd0 + int 16 | number_integer | 0xd1 + int 32 | number_integer | 0xd2 + int 64 | number_integer | 0xd3 + str 8 | string | 0xd9 + str 16 | string | 0xda + str 32 | string | 0xdb + array 16 | array | 0xdc + array 32 | array | 0xdd + map 16 | object | 0xde + map 32 | object | 0xdf + negative fixint | number_integer | 0xe0-0xff + + @warning The mapping is **incomplete** in the sense that not all + MessagePack types can be converted to a JSON value. The following + MessagePack types are not supported and will yield parse errors: + - bin 8 - bin 32 (0xc4..0xc6) + - ext 8 - ext 32 (0xc7..0xc9) + - fixext 1 - fixext 16 (0xd4..0xd8) + + @note Any MessagePack output created @ref to_msgpack can be successfully + parsed by @ref from_msgpack. + + @param[in] v a byte vector in MessagePack format + @param[in] start_index the index to start reading from @a v (0 by default) + @return deserialized JSON value + + @throw parse_error.110 if the given vector ends prematurely + @throw parse_error.112 if unsupported features from MessagePack were + used in the given vector @a v or if the input is not valid MessagePack + @throw parse_error.113 if a string was expected as map key, but not found + + @complexity Linear in the size of the byte vector @a v. + + @liveexample{The example shows the deserialization of a byte vector in + MessagePack format to a JSON value.,from_msgpack} + + @sa http://msgpack.org + @sa @ref to_msgpack(const basic_json&) for the analogous serialization + @sa @ref from_cbor(const std::vector&, const size_t) for the + related CBOR format + + @since version 2.0.9, parameter @a start_index since 2.1.1 + */ + static basic_json from_msgpack(const std::vector& v, + const size_t start_index = 0) + { + binary_reader br(reinterpret_cast(v.data() + start_index), v.size() - start_index); + return br.parse_msgpack(); + } + ////////////////////// // lexer and parser // ////////////////////// diff --git a/test/src/unit-cbor.cpp b/test/src/unit-cbor.cpp index 390a1b52..ae6f9a76 100644 --- a/test/src/unit-cbor.cpp +++ b/test/src/unit-cbor.cpp @@ -28,7 +28,6 @@ SOFTWARE. #include "catch.hpp" -#define private public #include "json.hpp" using nlohmann::json; @@ -1357,12 +1356,6 @@ TEST_CASE("CBOR regressions", "[!throws]") } } } - - SECTION("improve code coverage") - { - // exotic edge case - CHECK_THROWS_AS(json::check_length(0xffffffffffffffffull, 0xfffffffffffffff0ull, 0xff), json::parse_error); - } } TEST_CASE("CBOR roundtrips", "[hide]") diff --git a/test/src/unit-msgpack.cpp b/test/src/unit-msgpack.cpp index 5d157812..3f1d1643 100644 --- a/test/src/unit-msgpack.cpp +++ b/test/src/unit-msgpack.cpp @@ -1038,35 +1038,35 @@ TEST_CASE("MessagePack") CHECK_THROWS_AS(json::from_msgpack(std::vector({0xcf, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00})), json::parse_error); CHECK_THROWS_WITH(json::from_msgpack(std::vector({0xcc})), - "[json.exception.parse_error.110] parse error at 2: cannot read 1 bytes from vector"); + "[json.exception.parse_error.110] parse error at 2: unexpected end of input"); CHECK_THROWS_WITH(json::from_msgpack(std::vector({0xcd})), - "[json.exception.parse_error.110] parse error at 2: cannot read 2 bytes from vector"); + "[json.exception.parse_error.110] parse error at 2: unexpected end of input"); CHECK_THROWS_WITH(json::from_msgpack(std::vector({0xcd, 0x00})), - "[json.exception.parse_error.110] parse error at 2: cannot read 2 bytes from vector"); + "[json.exception.parse_error.110] parse error at 3: unexpected end of input"); CHECK_THROWS_WITH(json::from_msgpack(std::vector({0xce})), - "[json.exception.parse_error.110] parse error at 2: cannot read 4 bytes from vector"); + "[json.exception.parse_error.110] parse error at 2: unexpected end of input"); CHECK_THROWS_WITH(json::from_msgpack(std::vector({0xce, 0x00})), - "[json.exception.parse_error.110] parse error at 2: cannot read 4 bytes from vector"); + "[json.exception.parse_error.110] parse error at 3: unexpected end of input"); CHECK_THROWS_WITH(json::from_msgpack(std::vector({0xce, 0x00, 0x00})), - "[json.exception.parse_error.110] parse error at 2: cannot read 4 bytes from vector"); + "[json.exception.parse_error.110] parse error at 4: unexpected end of input"); CHECK_THROWS_WITH(json::from_msgpack(std::vector({0xce, 0x00, 0x00, 0x00})), - "[json.exception.parse_error.110] parse error at 2: cannot read 4 bytes from vector"); + "[json.exception.parse_error.110] parse error at 5: unexpected end of input"); CHECK_THROWS_WITH(json::from_msgpack(std::vector({0xcf})), - "[json.exception.parse_error.110] parse error at 2: cannot read 8 bytes from vector"); + "[json.exception.parse_error.110] parse error at 2: unexpected end of input"); CHECK_THROWS_WITH(json::from_msgpack(std::vector({0xcf, 0x00})), - "[json.exception.parse_error.110] parse error at 2: cannot read 8 bytes from vector"); + "[json.exception.parse_error.110] parse error at 3: unexpected end of input"); CHECK_THROWS_WITH(json::from_msgpack(std::vector({0xcf, 0x00, 0x00})), - "[json.exception.parse_error.110] parse error at 2: cannot read 8 bytes from vector"); + "[json.exception.parse_error.110] parse error at 4: unexpected end of input"); CHECK_THROWS_WITH(json::from_msgpack(std::vector({0xcf, 0x00, 0x00, 0x00})), - "[json.exception.parse_error.110] parse error at 2: cannot read 8 bytes from vector"); + "[json.exception.parse_error.110] parse error at 5: unexpected end of input"); CHECK_THROWS_WITH(json::from_msgpack(std::vector({0xcf, 0x00, 0x00, 0x00, 0x00})), - "[json.exception.parse_error.110] parse error at 2: cannot read 8 bytes from vector"); + "[json.exception.parse_error.110] parse error at 6: unexpected end of input"); CHECK_THROWS_WITH(json::from_msgpack(std::vector({0xcf, 0x00, 0x00, 0x00, 0x00, 0x00})), - "[json.exception.parse_error.110] parse error at 2: cannot read 8 bytes from vector"); + "[json.exception.parse_error.110] parse error at 7: unexpected end of input"); CHECK_THROWS_WITH(json::from_msgpack(std::vector({0xcf, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00})), - "[json.exception.parse_error.110] parse error at 2: cannot read 8 bytes from vector"); + "[json.exception.parse_error.110] parse error at 8: unexpected end of input"); CHECK_THROWS_WITH(json::from_msgpack(std::vector({0xcf, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00})), - "[json.exception.parse_error.110] parse error at 2: cannot read 8 bytes from vector"); + "[json.exception.parse_error.110] parse error at 9: unexpected end of input"); } SECTION("unsupported bytes") diff --git a/test/src/unit-regression.cpp b/test/src/unit-regression.cpp index fb4f03d1..68ccda46 100644 --- a/test/src/unit-regression.cpp +++ b/test/src/unit-regression.cpp @@ -638,13 +638,13 @@ TEST_CASE("regression tests") std::vector vec1 {0xcb, 0x8f, 0x0a}; CHECK_THROWS_AS(json::from_msgpack(vec1), json::parse_error); CHECK_THROWS_WITH(json::from_msgpack(vec1), - "[json.exception.parse_error.110] parse error at 2: cannot read 8 bytes from vector"); + "[json.exception.parse_error.110] parse error at 4: unexpected end of input"); // related test case: incomplete float32 std::vector vec2 {0xca, 0x8f, 0x0a}; CHECK_THROWS_AS(json::from_msgpack(vec2), json::parse_error); CHECK_THROWS_WITH(json::from_msgpack(vec2), - "[json.exception.parse_error.110] parse error at 2: cannot read 4 bytes from vector"); + "[json.exception.parse_error.110] parse error at 4: unexpected end of input"); // related test case: incomplete Half-Precision Float (CBOR) std::vector vec3 {0xf9, 0x8f}; @@ -671,7 +671,7 @@ TEST_CASE("regression tests") std::vector vec1 {0x87}; CHECK_THROWS_AS(json::from_msgpack(vec1), json::parse_error); CHECK_THROWS_WITH(json::from_msgpack(vec1), - "[json.exception.parse_error.110] parse error at 2: cannot read 1 bytes from vector"); + "[json.exception.parse_error.110] parse error at 2: unexpected end of input"); // more test cases for MessagePack for (auto b : @@ -708,7 +708,7 @@ TEST_CASE("regression tests") "[json.exception.parse_error.110] parse error at 1: unexpected end of input"); CHECK_THROWS_AS(json::from_msgpack(vec2), json::parse_error); CHECK_THROWS_WITH(json::from_msgpack(vec2), - "[json.exception.parse_error.110] parse error at 1: cannot read 1 bytes from vector"); + "[json.exception.parse_error.110] parse error at 1: unexpected end of input"); } SECTION("issue #411 - Heap-buffer-overflow (OSS-Fuzz issue 366)") From c28bf823bce93b47afd05655b9eb535e7cfc05ec Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Tue, 4 Apr 2017 17:30:43 +0200 Subject: [PATCH 26/44] :hammer: added endianess check --- src/json.hpp | 42 +++++++++++++++++++++++++++++++----------- 1 file changed, 31 insertions(+), 11 deletions(-) diff --git a/src/json.hpp b/src/json.hpp index 6853d8c0..a26c69c5 100644 --- a/src/json.hpp +++ b/src/json.hpp @@ -9364,11 +9364,13 @@ class basic_json { public: explicit binary_reader(std::istream& i) - : ia(new cached_input_stream_adapter(i, 16384)) + : ia(new cached_input_stream_adapter(i, 16384)), + is_little_endian(little_endianess()) {} binary_reader(const char* buff, const size_t len) - : ia(new input_buffer_adapter(buff, len)) + : ia(new input_buffer_adapter(buff, len)), + is_little_endian(little_endianess()) {} ~binary_reader() @@ -9555,7 +9557,7 @@ class basic_json case 0x97: { basic_json result = value_t::array; - const auto len = static_cast(current - 0x80); + const auto len = static_cast(current & 0x1f); for (size_t i = 0; i < len; ++i) { result.push_back(parse_cbor()); @@ -9644,7 +9646,7 @@ class basic_json case 0xb7: { basic_json result = value_t::object; - const auto len = static_cast(current - 0xa0); + const auto len = static_cast(current & 0x1f); for (size_t i = 0; i < len; ++i) { get(); @@ -9780,7 +9782,7 @@ class basic_json default: // anything else (0xFF is handled inside the other types) { std::stringstream ss; - ss << std::hex << current; + ss << std::setw(2) << std::setfill('0') << std::hex << current; JSON_THROW(parse_error::create(112, chars_read, "error reading CBOR; last byte: 0x" + ss.str())); } } @@ -10182,20 +10184,26 @@ class basic_json default: // anything else { std::stringstream ss; - ss << std::hex << current; + ss << std::setw(2) << std::setfill('0') << std::hex << current; JSON_THROW(parse_error::create(112, chars_read, "error reading MessagePack; last byte: 0x" + ss.str())); } } } private: + // from http://stackoverflow.com/a/1001328/266378 + static bool little_endianess() + { + int num = 1; + return (*reinterpret_cast(&num) == 1); + } + int get() { ++chars_read; return (current = ia->get_character()); } - // todo: check if this breaks with endianess template T get_number() { @@ -10204,7 +10212,16 @@ class basic_json { get(); check_eof(); - vec[sizeof(T) - i - 1] = static_cast(current); + + // reverse byte order prior to conversion if necessary + if (is_little_endian) + { + vec[sizeof(T) - i - 1] = static_cast(current); + } + else + { + vec[i] = static_cast(current); + } } T result; @@ -10256,7 +10273,7 @@ class basic_json case 0x76: case 0x77: { - const auto len = static_cast(current - 0x60); + const auto len = static_cast(current & 0x1f); return get_string(len); } @@ -10298,7 +10315,7 @@ class basic_json default: { std::stringstream ss; - ss << std::hex << current; + ss << std::setw(2) << std::setfill('0') << std::hex << current; JSON_THROW(parse_error::create(113, chars_read, "expected a CBOR string; last byte: 0x" + ss.str())); } } @@ -10369,7 +10386,7 @@ class basic_json default: { std::stringstream ss; - ss << std::hex << current; + ss << std::setw(2) << std::setfill('0') << std::hex << current; JSON_THROW(parse_error::create(113, chars_read, "expected a MessagePack string; last byte: 0x" + ss.str())); } } @@ -10392,6 +10409,9 @@ class basic_json /// the number of characters read size_t chars_read = 0; + + /// whether we can assume little endianess + const bool is_little_endian = true; }; public: From 08fdfcca9a994fcf7022da250fa2c7328d3cf94b Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Tue, 4 Apr 2017 23:17:43 +0200 Subject: [PATCH 27/44] :hammer: implemented a binary writer --- src/json.hpp | 1443 ++++++++++++++++++++++++-------------------------- 1 file changed, 700 insertions(+), 743 deletions(-) diff --git a/src/json.hpp b/src/json.hpp index a26c69c5..c04408b2 100644 --- a/src/json.hpp +++ b/src/json.hpp @@ -7423,746 +7423,6 @@ class basic_json /// @} - ////////////////////////////////////////// - // binary serialization/deserialization // - ////////////////////////////////////////// - - /// @name binary serialization/deserialization support - /// @{ - - private: - /*! - @note Some code in the switch cases has been copied, because otherwise - copilers would complain about implicit fallthrough and there is no - portable attribute to mute such warnings. - */ - template - static void add_to_vector(std::vector& vec, size_t bytes, const T number) - { - assert(bytes == 1 or bytes == 2 or bytes == 4 or bytes == 8); - - switch (bytes) - { - case 8: - { - vec.push_back(static_cast((static_cast(number) >> 070) & 0xff)); - vec.push_back(static_cast((static_cast(number) >> 060) & 0xff)); - vec.push_back(static_cast((static_cast(number) >> 050) & 0xff)); - vec.push_back(static_cast((static_cast(number) >> 040) & 0xff)); - vec.push_back(static_cast((number >> 030) & 0xff)); - vec.push_back(static_cast((number >> 020) & 0xff)); - vec.push_back(static_cast((number >> 010) & 0xff)); - vec.push_back(static_cast(number & 0xff)); - break; - } - - case 4: - { - vec.push_back(static_cast((number >> 030) & 0xff)); - vec.push_back(static_cast((number >> 020) & 0xff)); - vec.push_back(static_cast((number >> 010) & 0xff)); - vec.push_back(static_cast(number & 0xff)); - break; - } - - case 2: - { - vec.push_back(static_cast((number >> 010) & 0xff)); - vec.push_back(static_cast(number & 0xff)); - break; - } - - case 1: - { - vec.push_back(static_cast(number & 0xff)); - break; - } - } - } - - /*! - @brief create a MessagePack serialization of a given JSON value - - This is a straightforward implementation of the MessagePack specification. - - @param[in] j JSON value to serialize - @param[in,out] v byte vector to write the serialization to - - @sa https://github.com/msgpack/msgpack/blob/master/spec.md - */ - static void to_msgpack_internal(const basic_json& j, std::vector& v) - { - switch (j.type()) - { - case value_t::null: - { - // nil - v.push_back(0xc0); - break; - } - - case value_t::boolean: - { - // true and false - v.push_back(j.m_value.boolean ? 0xc3 : 0xc2); - break; - } - - case value_t::number_integer: - { - if (j.m_value.number_integer >= 0) - { - // MessagePack does not differentiate between positive - // signed integers and unsigned integers. Therefore, we - // used the code from the value_t::number_unsigned case - // here. - if (j.m_value.number_unsigned < 128) - { - // positive fixnum - add_to_vector(v, 1, j.m_value.number_unsigned); - } - else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) - { - // uint 8 - v.push_back(0xcc); - add_to_vector(v, 1, j.m_value.number_unsigned); - } - else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) - { - // uint 16 - v.push_back(0xcd); - add_to_vector(v, 2, j.m_value.number_unsigned); - } - else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) - { - // uint 32 - v.push_back(0xce); - add_to_vector(v, 4, j.m_value.number_unsigned); - } - else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) - { - // uint 64 - v.push_back(0xcf); - add_to_vector(v, 8, j.m_value.number_unsigned); - } - } - else - { - if (j.m_value.number_integer >= -32) - { - // negative fixnum - add_to_vector(v, 1, j.m_value.number_integer); - } - else if (j.m_value.number_integer >= (std::numeric_limits::min)() and j.m_value.number_integer <= (std::numeric_limits::max)()) - { - // int 8 - v.push_back(0xd0); - add_to_vector(v, 1, j.m_value.number_integer); - } - else if (j.m_value.number_integer >= (std::numeric_limits::min)() and j.m_value.number_integer <= (std::numeric_limits::max)()) - { - // int 16 - v.push_back(0xd1); - add_to_vector(v, 2, j.m_value.number_integer); - } - else if (j.m_value.number_integer >= (std::numeric_limits::min)() and j.m_value.number_integer <= (std::numeric_limits::max)()) - { - // int 32 - v.push_back(0xd2); - add_to_vector(v, 4, j.m_value.number_integer); - } - else if (j.m_value.number_integer >= (std::numeric_limits::min)() and j.m_value.number_integer <= (std::numeric_limits::max)()) - { - // int 64 - v.push_back(0xd3); - add_to_vector(v, 8, j.m_value.number_integer); - } - } - break; - } - - case value_t::number_unsigned: - { - if (j.m_value.number_unsigned < 128) - { - // positive fixnum - add_to_vector(v, 1, j.m_value.number_unsigned); - } - else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) - { - // uint 8 - v.push_back(0xcc); - add_to_vector(v, 1, j.m_value.number_unsigned); - } - else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) - { - // uint 16 - v.push_back(0xcd); - add_to_vector(v, 2, j.m_value.number_unsigned); - } - else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) - { - // uint 32 - v.push_back(0xce); - add_to_vector(v, 4, j.m_value.number_unsigned); - } - else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) - { - // uint 64 - v.push_back(0xcf); - add_to_vector(v, 8, j.m_value.number_unsigned); - } - break; - } - - case value_t::number_float: - { - // float 64 - v.push_back(0xcb); - const auto* helper = reinterpret_cast(&(j.m_value.number_float)); - for (size_t i = 0; i < 8; ++i) - { - v.push_back(helper[7 - i]); - } - break; - } - - case value_t::string: - { - const auto N = j.m_value.string->size(); - if (N <= 31) - { - // fixstr - v.push_back(static_cast(0xa0 | N)); - } - else if (N <= 255) - { - // str 8 - v.push_back(0xd9); - add_to_vector(v, 1, N); - } - else if (N <= 65535) - { - // str 16 - v.push_back(0xda); - add_to_vector(v, 2, N); - } - else if (N <= 4294967295) - { - // str 32 - v.push_back(0xdb); - add_to_vector(v, 4, N); - } - - // append string - std::copy(j.m_value.string->begin(), j.m_value.string->end(), - std::back_inserter(v)); - break; - } - - case value_t::array: - { - const auto N = j.m_value.array->size(); - if (N <= 15) - { - // fixarray - v.push_back(static_cast(0x90 | N)); - } - else if (N <= 0xffff) - { - // array 16 - v.push_back(0xdc); - add_to_vector(v, 2, N); - } - else if (N <= 0xffffffff) - { - // array 32 - v.push_back(0xdd); - add_to_vector(v, 4, N); - } - - // append each element - for (const auto& el : *j.m_value.array) - { - to_msgpack_internal(el, v); - } - break; - } - - case value_t::object: - { - const auto N = j.m_value.object->size(); - if (N <= 15) - { - // fixmap - v.push_back(static_cast(0x80 | (N & 0xf))); - } - else if (N <= 65535) - { - // map 16 - v.push_back(0xde); - add_to_vector(v, 2, N); - } - else if (N <= 4294967295) - { - // map 32 - v.push_back(0xdf); - add_to_vector(v, 4, N); - } - - // append each element - for (const auto& el : *j.m_value.object) - { - to_msgpack_internal(el.first, v); - to_msgpack_internal(el.second, v); - } - break; - } - - default: - { - break; - } - } - } - - /*! - @brief create a CBOR serialization of a given JSON value - - This is a straightforward implementation of the CBOR specification. - - @param[in] j JSON value to serialize - @param[in,out] v byte vector to write the serialization to - - @sa https://tools.ietf.org/html/rfc7049 - */ - static void to_cbor_internal(const basic_json& j, std::vector& v) - { - switch (j.type()) - { - case value_t::null: - { - v.push_back(0xf6); - break; - } - - case value_t::boolean: - { - v.push_back(j.m_value.boolean ? 0xf5 : 0xf4); - break; - } - - case value_t::number_integer: - { - if (j.m_value.number_integer >= 0) - { - // CBOR does not differentiate between positive signed - // integers and unsigned integers. Therefore, we used the - // code from the value_t::number_unsigned case here. - if (j.m_value.number_integer <= 0x17) - { - add_to_vector(v, 1, j.m_value.number_integer); - } - else if (j.m_value.number_integer <= (std::numeric_limits::max)()) - { - v.push_back(0x18); - // one-byte uint8_t - add_to_vector(v, 1, j.m_value.number_integer); - } - else if (j.m_value.number_integer <= (std::numeric_limits::max)()) - { - v.push_back(0x19); - // two-byte uint16_t - add_to_vector(v, 2, j.m_value.number_integer); - } - else if (j.m_value.number_integer <= (std::numeric_limits::max)()) - { - v.push_back(0x1a); - // four-byte uint32_t - add_to_vector(v, 4, j.m_value.number_integer); - } - else - { - v.push_back(0x1b); - // eight-byte uint64_t - add_to_vector(v, 8, j.m_value.number_integer); - } - } - else - { - // The conversions below encode the sign in the first - // byte, and the value is converted to a positive number. - const auto positive_number = -1 - j.m_value.number_integer; - if (j.m_value.number_integer >= -24) - { - v.push_back(static_cast(0x20 + positive_number)); - } - else if (positive_number <= (std::numeric_limits::max)()) - { - // int 8 - v.push_back(0x38); - add_to_vector(v, 1, positive_number); - } - else if (positive_number <= (std::numeric_limits::max)()) - { - // int 16 - v.push_back(0x39); - add_to_vector(v, 2, positive_number); - } - else if (positive_number <= (std::numeric_limits::max)()) - { - // int 32 - v.push_back(0x3a); - add_to_vector(v, 4, positive_number); - } - else - { - // int 64 - v.push_back(0x3b); - add_to_vector(v, 8, positive_number); - } - } - break; - } - - case value_t::number_unsigned: - { - if (j.m_value.number_unsigned <= 0x17) - { - v.push_back(static_cast(j.m_value.number_unsigned)); - } - else if (j.m_value.number_unsigned <= 0xff) - { - v.push_back(0x18); - // one-byte uint8_t - add_to_vector(v, 1, j.m_value.number_unsigned); - } - else if (j.m_value.number_unsigned <= 0xffff) - { - v.push_back(0x19); - // two-byte uint16_t - add_to_vector(v, 2, j.m_value.number_unsigned); - } - else if (j.m_value.number_unsigned <= 0xffffffff) - { - v.push_back(0x1a); - // four-byte uint32_t - add_to_vector(v, 4, j.m_value.number_unsigned); - } - else if (j.m_value.number_unsigned <= 0xffffffffffffffff) - { - v.push_back(0x1b); - // eight-byte uint64_t - add_to_vector(v, 8, j.m_value.number_unsigned); - } - break; - } - - case value_t::number_float: - { - // Double-Precision Float - v.push_back(0xfb); - const auto* helper = reinterpret_cast(&(j.m_value.number_float)); - for (size_t i = 0; i < 8; ++i) - { - v.push_back(helper[7 - i]); - } - break; - } - - case value_t::string: - { - const auto N = j.m_value.string->size(); - if (N <= 0x17) - { - v.push_back(static_cast(0x60 + N)); // 1 byte for string + size - } - else if (N <= 0xff) - { - v.push_back(0x78); // one-byte uint8_t for N - add_to_vector(v, 1, N); - } - else if (N <= 0xffff) - { - v.push_back(0x79); // two-byte uint16_t for N - add_to_vector(v, 2, N); - } - else if (N <= 0xffffffff) - { - v.push_back(0x7a); // four-byte uint32_t for N - add_to_vector(v, 4, N); - } - // LCOV_EXCL_START - else if (N <= 0xffffffffffffffff) - { - v.push_back(0x7b); // eight-byte uint64_t for N - add_to_vector(v, 8, N); - } - // LCOV_EXCL_STOP - - // append string - std::copy(j.m_value.string->begin(), j.m_value.string->end(), - std::back_inserter(v)); - break; - } - - case value_t::array: - { - const auto N = j.m_value.array->size(); - if (N <= 0x17) - { - v.push_back(static_cast(0x80 + N)); // 1 byte for array + size - } - else if (N <= 0xff) - { - v.push_back(0x98); // one-byte uint8_t for N - add_to_vector(v, 1, N); - } - else if (N <= 0xffff) - { - v.push_back(0x99); // two-byte uint16_t for N - add_to_vector(v, 2, N); - } - else if (N <= 0xffffffff) - { - v.push_back(0x9a); // four-byte uint32_t for N - add_to_vector(v, 4, N); - } - // LCOV_EXCL_START - else if (N <= 0xffffffffffffffff) - { - v.push_back(0x9b); // eight-byte uint64_t for N - add_to_vector(v, 8, N); - } - // LCOV_EXCL_STOP - - // append each element - for (const auto& el : *j.m_value.array) - { - to_cbor_internal(el, v); - } - break; - } - - case value_t::object: - { - const auto N = j.m_value.object->size(); - if (N <= 0x17) - { - v.push_back(static_cast(0xa0 + N)); // 1 byte for object + size - } - else if (N <= 0xff) - { - v.push_back(0xb8); - add_to_vector(v, 1, N); // one-byte uint8_t for N - } - else if (N <= 0xffff) - { - v.push_back(0xb9); - add_to_vector(v, 2, N); // two-byte uint16_t for N - } - else if (N <= 0xffffffff) - { - v.push_back(0xba); - add_to_vector(v, 4, N); // four-byte uint32_t for N - } - // LCOV_EXCL_START - else if (N <= 0xffffffffffffffff) - { - v.push_back(0xbb); - add_to_vector(v, 8, N); // eight-byte uint64_t for N - } - // LCOV_EXCL_STOP - - // append each element - for (const auto& el : *j.m_value.object) - { - to_cbor_internal(el.first, v); - to_cbor_internal(el.second, v); - } - break; - } - - default: - { - break; - } - } - } - - public: - /*! - @brief create a MessagePack serialization of a given JSON value - - Serializes a given JSON value @a j to a byte vector using the MessagePack - serialization format. MessagePack is a binary serialization format which - aims to be more compact than JSON itself, yet more efficient to parse. - - The library uses the following mapping from JSON values types to - MessagePack types according to the MessagePack specification: - - JSON value type | value/range | MessagePack type | first byte - --------------- | --------------------------------- | ---------------- | ---------- - null | `null` | nil | 0xc0 - boolean | `true` | true | 0xc3 - boolean | `false` | false | 0xc2 - number_integer | -9223372036854775808..-2147483649 | int64 | 0xd3 - number_integer | -2147483648..-32769 | int32 | 0xd2 - number_integer | -32768..-129 | int16 | 0xd1 - number_integer | -128..-33 | int8 | 0xd0 - number_integer | -32..-1 | negative fixint | 0xe0..0xff - number_integer | 0..127 | positive fixint | 0x00..0x7f - number_integer | 128..255 | uint 8 | 0xcc - number_integer | 256..65535 | uint 16 | 0xcd - number_integer | 65536..4294967295 | uint 32 | 0xce - number_integer | 4294967296..18446744073709551615 | uint 64 | 0xcf - number_unsigned | 0..127 | positive fixint | 0x00..0x7f - number_unsigned | 128..255 | uint 8 | 0xcc - number_unsigned | 256..65535 | uint 16 | 0xcd - number_unsigned | 65536..4294967295 | uint 32 | 0xce - number_unsigned | 4294967296..18446744073709551615 | uint 64 | 0xcf - number_float | *any value* | float 64 | 0xcb - string | *length*: 0..31 | fixstr | 0xa0..0xbf - string | *length*: 32..255 | str 8 | 0xd9 - string | *length*: 256..65535 | str 16 | 0xda - string | *length*: 65536..4294967295 | str 32 | 0xdb - array | *size*: 0..15 | fixarray | 0x90..0x9f - array | *size*: 16..65535 | array 16 | 0xdc - array | *size*: 65536..4294967295 | array 32 | 0xdd - object | *size*: 0..15 | fix map | 0x80..0x8f - object | *size*: 16..65535 | map 16 | 0xde - object | *size*: 65536..4294967295 | map 32 | 0xdf - - @note The mapping is **complete** in the sense that any JSON value type - can be converted to a MessagePack value. - - @note The following values can **not** be converted to a MessagePack value: - - strings with more than 4294967295 bytes - - arrays with more than 4294967295 elements - - objects with more than 4294967295 elements - - @note The following MessagePack types are not used in the conversion: - - bin 8 - bin 32 (0xc4..0xc6) - - ext 8 - ext 32 (0xc7..0xc9) - - float 32 (0xca) - - fixext 1 - fixext 16 (0xd4..0xd8) - - @note Any MessagePack output created @ref to_msgpack can be successfully - parsed by @ref from_msgpack. - - @param[in] j JSON value to serialize - @return MessagePack serialization as byte vector - - @complexity Linear in the size of the JSON value @a j. - - @liveexample{The example shows the serialization of a JSON value to a byte - vector in MessagePack format.,to_msgpack} - - @sa http://msgpack.org - @sa @ref from_msgpack(const std::vector&, const size_t) for the - analogous deserialization - @sa @ref to_cbor(const basic_json& for the related CBOR format - - @since version 2.0.9 - */ - static std::vector to_msgpack(const basic_json& j) - { - std::vector result; - to_msgpack_internal(j, result); - return result; - } - - /*! - @brief create a CBOR serialization of a given JSON value - - Serializes a given JSON value @a j to a byte vector using the CBOR (Concise - Binary Object Representation) serialization format. CBOR is a binary - serialization format which aims to be more compact than JSON itself, yet - more efficient to parse. - - The library uses the following mapping from JSON values types to - CBOR types according to the CBOR specification (RFC 7049): - - JSON value type | value/range | CBOR type | first byte - --------------- | ------------------------------------------ | ---------------------------------- | --------------- - null | `null` | Null | 0xf6 - boolean | `true` | True | 0xf5 - boolean | `false` | False | 0xf4 - number_integer | -9223372036854775808..-2147483649 | Negative integer (8 bytes follow) | 0x3b - number_integer | -2147483648..-32769 | Negative integer (4 bytes follow) | 0x3a - number_integer | -32768..-129 | Negative integer (2 bytes follow) | 0x39 - number_integer | -128..-25 | Negative integer (1 byte follow) | 0x38 - number_integer | -24..-1 | Negative integer | 0x20..0x37 - number_integer | 0..23 | Integer | 0x00..0x17 - number_integer | 24..255 | Unsigned integer (1 byte follow) | 0x18 - number_integer | 256..65535 | Unsigned integer (2 bytes follow) | 0x19 - number_integer | 65536..4294967295 | Unsigned integer (4 bytes follow) | 0x1a - number_integer | 4294967296..18446744073709551615 | Unsigned integer (8 bytes follow) | 0x1b - number_unsigned | 0..23 | Integer | 0x00..0x17 - number_unsigned | 24..255 | Unsigned integer (1 byte follow) | 0x18 - number_unsigned | 256..65535 | Unsigned integer (2 bytes follow) | 0x19 - number_unsigned | 65536..4294967295 | Unsigned integer (4 bytes follow) | 0x1a - number_unsigned | 4294967296..18446744073709551615 | Unsigned integer (8 bytes follow) | 0x1b - number_float | *any value* | Double-Precision Float | 0xfb - string | *length*: 0..23 | UTF-8 string | 0x60..0x77 - string | *length*: 23..255 | UTF-8 string (1 byte follow) | 0x78 - string | *length*: 256..65535 | UTF-8 string (2 bytes follow) | 0x79 - string | *length*: 65536..4294967295 | UTF-8 string (4 bytes follow) | 0x7a - string | *length*: 4294967296..18446744073709551615 | UTF-8 string (8 bytes follow) | 0x7b - array | *size*: 0..23 | array | 0x80..0x97 - array | *size*: 23..255 | array (1 byte follow) | 0x98 - array | *size*: 256..65535 | array (2 bytes follow) | 0x99 - array | *size*: 65536..4294967295 | array (4 bytes follow) | 0x9a - array | *size*: 4294967296..18446744073709551615 | array (8 bytes follow) | 0x9b - object | *size*: 0..23 | map | 0xa0..0xb7 - object | *size*: 23..255 | map (1 byte follow) | 0xb8 - object | *size*: 256..65535 | map (2 bytes follow) | 0xb9 - object | *size*: 65536..4294967295 | map (4 bytes follow) | 0xba - object | *size*: 4294967296..18446744073709551615 | map (8 bytes follow) | 0xbb - - @note The mapping is **complete** in the sense that any JSON value type - can be converted to a CBOR value. - - @note The following CBOR types are not used in the conversion: - - byte strings (0x40..0x5f) - - UTF-8 strings terminated by "break" (0x7f) - - arrays terminated by "break" (0x9f) - - maps terminated by "break" (0xbf) - - date/time (0xc0..0xc1) - - bignum (0xc2..0xc3) - - decimal fraction (0xc4) - - bigfloat (0xc5) - - tagged items (0xc6..0xd4, 0xd8..0xdb) - - expected conversions (0xd5..0xd7) - - simple values (0xe0..0xf3, 0xf8) - - undefined (0xf7) - - half and single-precision floats (0xf9-0xfa) - - break (0xff) - - @param[in] j JSON value to serialize - @return MessagePack serialization as byte vector - - @complexity Linear in the size of the JSON value @a j. - - @liveexample{The example shows the serialization of a JSON value to a byte - vector in CBOR format.,to_cbor} - - @sa http://cbor.io - @sa @ref from_cbor(const std::vector&, const size_t) for the - analogous deserialization - @sa @ref to_msgpack(const basic_json& for the related MessagePack format - - @since version 2.0.9 - */ - static std::vector to_cbor(const basic_json& j) - { - std::vector result; - to_cbor_internal(j, result); - return result; - } - - /// @} - /////////////////////////// // convenience functions // /////////////////////////// @@ -9355,9 +8615,12 @@ class basic_json const char* start; }; - //////////////////// - // binary formats // - //////////////////// + ////////////////////////////////////////// + // binary serialization/deserialization // + ////////////////////////////////////////// + + /// @name binary serialization/deserialization support + /// @{ private: class binary_reader @@ -10414,7 +9677,699 @@ class basic_json const bool is_little_endian = true; }; + class binary_writer + { + public: + binary_writer() + : is_little_endian(little_endianess()) + {} + + std::vector write_cbor(const basic_json& j) + { + write_cbor_internal(j); + return v; + } + + std::vector write_msgpack(const basic_json& j) + { + write_msgpack_internal(j); + return v; + } + + private: + void write_cbor_internal(const basic_json& j) + { + switch (j.type()) + { + case value_t::null: + { + v.push_back(0xf6); + break; + } + + case value_t::boolean: + { + v.push_back(j.m_value.boolean ? 0xf5 : 0xf4); + break; + } + + case value_t::number_integer: + { + if (j.m_value.number_integer >= 0) + { + // CBOR does not differentiate between positive signed + // integers and unsigned integers. Therefore, we used the + // code from the value_t::number_unsigned case here. + if (j.m_value.number_integer <= 0x17) + { + write_number(static_cast(j.m_value.number_integer)); + } + else if (j.m_value.number_integer <= (std::numeric_limits::max)()) + { + v.push_back(0x18); + write_number(static_cast(j.m_value.number_integer)); + } + else if (j.m_value.number_integer <= (std::numeric_limits::max)()) + { + v.push_back(0x19); + write_number(static_cast(j.m_value.number_integer)); + } + else if (j.m_value.number_integer <= (std::numeric_limits::max)()) + { + v.push_back(0x1a); + write_number(static_cast(j.m_value.number_integer)); + } + else + { + v.push_back(0x1b); + write_number(static_cast(j.m_value.number_integer)); + } + } + else + { + // The conversions below encode the sign in the first + // byte, and the value is converted to a positive number. + const auto positive_number = -1 - j.m_value.number_integer; + if (j.m_value.number_integer >= -24) + { + write_number(static_cast(0x20 + positive_number)); + } + else if (positive_number <= (std::numeric_limits::max)()) + { + v.push_back(0x38); + write_number(static_cast(positive_number)); + } + else if (positive_number <= (std::numeric_limits::max)()) + { + v.push_back(0x39); + write_number(static_cast(positive_number)); + } + else if (positive_number <= (std::numeric_limits::max)()) + { + v.push_back(0x3a); + write_number(static_cast(positive_number)); + } + else + { + v.push_back(0x3b); + write_number(static_cast(positive_number)); + } + } + break; + } + + case value_t::number_unsigned: + { + if (j.m_value.number_unsigned <= 0x17) + { + write_number(static_cast(j.m_value.number_unsigned)); + } + else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) + { + v.push_back(0x18); + write_number(static_cast(j.m_value.number_unsigned)); + } + else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) + { + v.push_back(0x19); + write_number(static_cast(j.m_value.number_unsigned)); + } + else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) + { + v.push_back(0x1a); + write_number(static_cast(j.m_value.number_unsigned)); + } + else + { + v.push_back(0x1b); + write_number(static_cast(j.m_value.number_unsigned)); + } + break; + } + + case value_t::number_float: + { + // Double-Precision Float + v.push_back(0xfb); + write_number(j.m_value.number_float); + break; + } + + case value_t::string: + { + const auto N = j.m_value.string->size(); + if (N <= 0x17) + { + write_number(static_cast(0x60 + N)); + } + else if (N <= 0xff) + { + v.push_back(0x78); + write_number(static_cast(N)); + } + else if (N <= 0xffff) + { + v.push_back(0x79); + write_number(static_cast(N)); + } + else if (N <= 0xffffffff) + { + v.push_back(0x7a); + write_number(static_cast(N)); + } + // LCOV_EXCL_START + else if (N <= 0xffffffffffffffff) + { + v.push_back(0x7b); + write_number(static_cast(N)); + } + // LCOV_EXCL_STOP + + // append string + std::copy(j.m_value.string->begin(), j.m_value.string->end(), + std::back_inserter(v)); + break; + } + + case value_t::array: + { + const auto N = j.m_value.array->size(); + if (N <= 0x17) + { + write_number(static_cast(0x80 + N)); + } + else if (N <= 0xff) + { + v.push_back(0x98); + write_number(static_cast(N)); + } + else if (N <= 0xffff) + { + v.push_back(0x99); + write_number(static_cast(N)); + } + else if (N <= 0xffffffff) + { + v.push_back(0x9a); + write_number(static_cast(N)); + } + // LCOV_EXCL_START + else if (N <= 0xffffffffffffffff) + { + v.push_back(0x9b); + write_number(static_cast(N)); + } + // LCOV_EXCL_STOP + + // append each element + for (const auto& el : *j.m_value.array) + { + write_cbor_internal(el); + } + break; + } + + case value_t::object: + { + const auto N = j.m_value.object->size(); + if (N <= 0x17) + { + write_number(static_cast(0xa0 + N)); + } + else if (N <= 0xff) + { + v.push_back(0xb8); + write_number(static_cast(N)); + } + else if (N <= 0xffff) + { + v.push_back(0xb9); + write_number(static_cast(N)); + } + else if (N <= 0xffffffff) + { + v.push_back(0xba); + write_number(static_cast(N)); + } + // LCOV_EXCL_START + else if (N <= 0xffffffffffffffff) + { + v.push_back(0xbb); + write_number(static_cast(N)); + } + // LCOV_EXCL_STOP + + // append each element + for (const auto& el : *j.m_value.object) + { + write_cbor_internal(el.first); + write_cbor_internal(el.second); + } + break; + } + + default: + { + break; + } + } + } + + void write_msgpack_internal(const basic_json& j) + { + switch (j.type()) + { + case value_t::null: + { + // nil + v.push_back(0xc0); + break; + } + + case value_t::boolean: + { + // true and false + v.push_back(j.m_value.boolean ? 0xc3 : 0xc2); + break; + } + + case value_t::number_integer: + { + if (j.m_value.number_integer >= 0) + { + // MessagePack does not differentiate between positive + // signed integers and unsigned integers. Therefore, we + // used the code from the value_t::number_unsigned case + // here. + if (j.m_value.number_unsigned < 128) + { + // positive fixnum + write_number(static_cast(j.m_value.number_integer)); + } + else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) + { + // uint 8 + v.push_back(0xcc); + write_number(static_cast(j.m_value.number_integer)); + } + else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) + { + // uint 16 + v.push_back(0xcd); + write_number(static_cast(j.m_value.number_integer)); + } + else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) + { + // uint 32 + v.push_back(0xce); + write_number(static_cast(j.m_value.number_integer)); + } + else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) + { + // uint 64 + v.push_back(0xcf); + write_number(static_cast(j.m_value.number_integer)); + } + } + else + { + if (j.m_value.number_integer >= -32) + { + // negative fixnum + write_number(static_cast(j.m_value.number_integer)); + } + else if (j.m_value.number_integer >= (std::numeric_limits::min)() and j.m_value.number_integer <= (std::numeric_limits::max)()) + { + // int 8 + v.push_back(0xd0); + write_number(static_cast(j.m_value.number_integer)); + } + else if (j.m_value.number_integer >= (std::numeric_limits::min)() and j.m_value.number_integer <= (std::numeric_limits::max)()) + { + // int 16 + v.push_back(0xd1); + write_number(static_cast(j.m_value.number_integer)); + } + else if (j.m_value.number_integer >= (std::numeric_limits::min)() and j.m_value.number_integer <= (std::numeric_limits::max)()) + { + // int 32 + v.push_back(0xd2); + write_number(static_cast(j.m_value.number_integer)); + } + else if (j.m_value.number_integer >= (std::numeric_limits::min)() and j.m_value.number_integer <= (std::numeric_limits::max)()) + { + // int 64 + v.push_back(0xd3); + write_number(static_cast(j.m_value.number_integer)); + } + } + break; + } + + case value_t::number_unsigned: + { + if (j.m_value.number_unsigned < 128) + { + // positive fixnum + write_number(static_cast(j.m_value.number_integer)); + } + else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) + { + // uint 8 + v.push_back(0xcc); + write_number(static_cast(j.m_value.number_integer)); + } + else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) + { + // uint 16 + v.push_back(0xcd); + write_number(static_cast(j.m_value.number_integer)); + } + else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) + { + // uint 32 + v.push_back(0xce); + write_number(static_cast(j.m_value.number_integer)); + } + else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) + { + // uint 64 + v.push_back(0xcf); + write_number(static_cast(j.m_value.number_integer)); + } + break; + } + + case value_t::number_float: + { + // float 64 + v.push_back(0xcb); + write_number(j.m_value.number_float); + break; + } + + case value_t::string: + { + const auto N = j.m_value.string->size(); + if (N <= 31) + { + // fixstr + write_number(static_cast(0xa0 | N)); + } + else if (N <= 255) + { + // str 8 + v.push_back(0xd9); + write_number(static_cast(N)); + } + else if (N <= 65535) + { + // str 16 + v.push_back(0xda); + write_number(static_cast(N)); + } + else if (N <= 4294967295) + { + // str 32 + v.push_back(0xdb); + write_number(static_cast(N)); + } + + // append string + std::copy(j.m_value.string->begin(), j.m_value.string->end(), + std::back_inserter(v)); + break; + } + + case value_t::array: + { + const auto N = j.m_value.array->size(); + if (N <= 15) + { + // fixarray + write_number(static_cast(0x90 | N)); + } + else if (N <= 0xffff) + { + // array 16 + v.push_back(0xdc); + write_number(static_cast(N)); + } + else if (N <= 0xffffffff) + { + // array 32 + v.push_back(0xdd); + write_number(static_cast(N)); + } + + // append each element + for (const auto& el : *j.m_value.array) + { + write_msgpack_internal(el); + } + break; + } + + case value_t::object: + { + const auto N = j.m_value.object->size(); + if (N <= 15) + { + // fixmap + write_number(static_cast(0x80 | (N & 0xf))); + } + else if (N <= 65535) + { + // map 16 + v.push_back(0xde); + write_number(static_cast(N)); + } + else if (N <= 4294967295) + { + // map 32 + v.push_back(0xdf); + write_number(static_cast(N)); + } + + // append each element + for (const auto& el : *j.m_value.object) + { + write_msgpack_internal(el.first); + write_msgpack_internal(el.second); + } + break; + } + + default: + { + break; + } + } + } + + template + void write_number(T n) + { + std::array vec; + std::memcpy(vec.data(), &n, sizeof(T)); + + for (size_t i = 0; i < sizeof(T); ++i) + { + // reverse byte order prior to conversion if necessary + if (is_little_endian) + { + v.push_back(vec[sizeof(T) - i - 1]); + } + else + { + v.push_back(vec[i]); + } + } + } + + // from http://stackoverflow.com/a/1001328/266378 + static bool little_endianess() + { + int num = 1; + return (*reinterpret_cast(&num) == 1); + } + + private: + /// whether we can assume little endianess + const bool is_little_endian = true; + + /// the vector that is used as output + std::vector v; + }; + public: + /*! + @brief create a CBOR serialization of a given JSON value + + Serializes a given JSON value @a j to a byte vector using the CBOR (Concise + Binary Object Representation) serialization format. CBOR is a binary + serialization format which aims to be more compact than JSON itself, yet + more efficient to parse. + + The library uses the following mapping from JSON values types to + CBOR types according to the CBOR specification (RFC 7049): + + JSON value type | value/range | CBOR type | first byte + --------------- | ------------------------------------------ | ---------------------------------- | --------------- + null | `null` | Null | 0xf6 + boolean | `true` | True | 0xf5 + boolean | `false` | False | 0xf4 + number_integer | -9223372036854775808..-2147483649 | Negative integer (8 bytes follow) | 0x3b + number_integer | -2147483648..-32769 | Negative integer (4 bytes follow) | 0x3a + number_integer | -32768..-129 | Negative integer (2 bytes follow) | 0x39 + number_integer | -128..-25 | Negative integer (1 byte follow) | 0x38 + number_integer | -24..-1 | Negative integer | 0x20..0x37 + number_integer | 0..23 | Integer | 0x00..0x17 + number_integer | 24..255 | Unsigned integer (1 byte follow) | 0x18 + number_integer | 256..65535 | Unsigned integer (2 bytes follow) | 0x19 + number_integer | 65536..4294967295 | Unsigned integer (4 bytes follow) | 0x1a + number_integer | 4294967296..18446744073709551615 | Unsigned integer (8 bytes follow) | 0x1b + number_unsigned | 0..23 | Integer | 0x00..0x17 + number_unsigned | 24..255 | Unsigned integer (1 byte follow) | 0x18 + number_unsigned | 256..65535 | Unsigned integer (2 bytes follow) | 0x19 + number_unsigned | 65536..4294967295 | Unsigned integer (4 bytes follow) | 0x1a + number_unsigned | 4294967296..18446744073709551615 | Unsigned integer (8 bytes follow) | 0x1b + number_float | *any value* | Double-Precision Float | 0xfb + string | *length*: 0..23 | UTF-8 string | 0x60..0x77 + string | *length*: 23..255 | UTF-8 string (1 byte follow) | 0x78 + string | *length*: 256..65535 | UTF-8 string (2 bytes follow) | 0x79 + string | *length*: 65536..4294967295 | UTF-8 string (4 bytes follow) | 0x7a + string | *length*: 4294967296..18446744073709551615 | UTF-8 string (8 bytes follow) | 0x7b + array | *size*: 0..23 | array | 0x80..0x97 + array | *size*: 23..255 | array (1 byte follow) | 0x98 + array | *size*: 256..65535 | array (2 bytes follow) | 0x99 + array | *size*: 65536..4294967295 | array (4 bytes follow) | 0x9a + array | *size*: 4294967296..18446744073709551615 | array (8 bytes follow) | 0x9b + object | *size*: 0..23 | map | 0xa0..0xb7 + object | *size*: 23..255 | map (1 byte follow) | 0xb8 + object | *size*: 256..65535 | map (2 bytes follow) | 0xb9 + object | *size*: 65536..4294967295 | map (4 bytes follow) | 0xba + object | *size*: 4294967296..18446744073709551615 | map (8 bytes follow) | 0xbb + + @note The mapping is **complete** in the sense that any JSON value type + can be converted to a CBOR value. + + @note The following CBOR types are not used in the conversion: + - byte strings (0x40..0x5f) + - UTF-8 strings terminated by "break" (0x7f) + - arrays terminated by "break" (0x9f) + - maps terminated by "break" (0xbf) + - date/time (0xc0..0xc1) + - bignum (0xc2..0xc3) + - decimal fraction (0xc4) + - bigfloat (0xc5) + - tagged items (0xc6..0xd4, 0xd8..0xdb) + - expected conversions (0xd5..0xd7) + - simple values (0xe0..0xf3, 0xf8) + - undefined (0xf7) + - half and single-precision floats (0xf9-0xfa) + - break (0xff) + + @param[in] j JSON value to serialize + @return MessagePack serialization as byte vector + + @complexity Linear in the size of the JSON value @a j. + + @liveexample{The example shows the serialization of a JSON value to a byte + vector in CBOR format.,to_cbor} + + @sa http://cbor.io + @sa @ref from_cbor(const std::vector&, const size_t) for the + analogous deserialization + @sa @ref to_msgpack(const basic_json& for the related MessagePack format + + @since version 2.0.9 + */ + static std::vector to_cbor(const basic_json& j) + { + binary_writer bw; + return bw.write_cbor(j); + } + + /*! + @brief create a MessagePack serialization of a given JSON value + + Serializes a given JSON value @a j to a byte vector using the MessagePack + serialization format. MessagePack is a binary serialization format which + aims to be more compact than JSON itself, yet more efficient to parse. + + The library uses the following mapping from JSON values types to + MessagePack types according to the MessagePack specification: + + JSON value type | value/range | MessagePack type | first byte + --------------- | --------------------------------- | ---------------- | ---------- + null | `null` | nil | 0xc0 + boolean | `true` | true | 0xc3 + boolean | `false` | false | 0xc2 + number_integer | -9223372036854775808..-2147483649 | int64 | 0xd3 + number_integer | -2147483648..-32769 | int32 | 0xd2 + number_integer | -32768..-129 | int16 | 0xd1 + number_integer | -128..-33 | int8 | 0xd0 + number_integer | -32..-1 | negative fixint | 0xe0..0xff + number_integer | 0..127 | positive fixint | 0x00..0x7f + number_integer | 128..255 | uint 8 | 0xcc + number_integer | 256..65535 | uint 16 | 0xcd + number_integer | 65536..4294967295 | uint 32 | 0xce + number_integer | 4294967296..18446744073709551615 | uint 64 | 0xcf + number_unsigned | 0..127 | positive fixint | 0x00..0x7f + number_unsigned | 128..255 | uint 8 | 0xcc + number_unsigned | 256..65535 | uint 16 | 0xcd + number_unsigned | 65536..4294967295 | uint 32 | 0xce + number_unsigned | 4294967296..18446744073709551615 | uint 64 | 0xcf + number_float | *any value* | float 64 | 0xcb + string | *length*: 0..31 | fixstr | 0xa0..0xbf + string | *length*: 32..255 | str 8 | 0xd9 + string | *length*: 256..65535 | str 16 | 0xda + string | *length*: 65536..4294967295 | str 32 | 0xdb + array | *size*: 0..15 | fixarray | 0x90..0x9f + array | *size*: 16..65535 | array 16 | 0xdc + array | *size*: 65536..4294967295 | array 32 | 0xdd + object | *size*: 0..15 | fix map | 0x80..0x8f + object | *size*: 16..65535 | map 16 | 0xde + object | *size*: 65536..4294967295 | map 32 | 0xdf + + @note The mapping is **complete** in the sense that any JSON value type + can be converted to a MessagePack value. + + @note The following values can **not** be converted to a MessagePack value: + - strings with more than 4294967295 bytes + - arrays with more than 4294967295 elements + - objects with more than 4294967295 elements + + @note The following MessagePack types are not used in the conversion: + - bin 8 - bin 32 (0xc4..0xc6) + - ext 8 - ext 32 (0xc7..0xc9) + - float 32 (0xca) + - fixext 1 - fixext 16 (0xd4..0xd8) + + @note Any MessagePack output created @ref to_msgpack can be successfully + parsed by @ref from_msgpack. + + @param[in] j JSON value to serialize + @return MessagePack serialization as byte vector + + @complexity Linear in the size of the JSON value @a j. + + @liveexample{The example shows the serialization of a JSON value to a byte + vector in MessagePack format.,to_msgpack} + + @sa http://msgpack.org + @sa @ref from_msgpack(const std::vector&, const size_t) for the + analogous deserialization + @sa @ref to_cbor(const basic_json& for the related CBOR format + + @since version 2.0.9 + */ + static std::vector to_msgpack(const basic_json& j) + { + binary_writer bw; + return bw.write_msgpack(j); + } /*! @brief create a JSON value from a byte vector in CBOR format @@ -10585,6 +10540,8 @@ class basic_json return br.parse_msgpack(); } + /// @} + ////////////////////// // lexer and parser // ////////////////////// From b992acc2e78578b2106a427d23013cba75841997 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Wed, 5 Apr 2017 20:39:27 +0200 Subject: [PATCH 28/44] :hammer: fixed a compiler warning --- src/json.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/json.hpp b/src/json.hpp index c04408b2..ff3fe826 100644 --- a/src/json.hpp +++ b/src/json.hpp @@ -10199,7 +10199,7 @@ class basic_json const bool is_little_endian = true; /// the vector that is used as output - std::vector v; + std::vector v {}; }; public: From ff72f3886321eef8e61f9e1b64ccb19032e18549 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Thu, 6 Apr 2017 19:54:08 +0200 Subject: [PATCH 29/44] :hammer: fixed another warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Do not store eof() in a char buffer… --- src/json.hpp | 39 +++++++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/src/json.hpp b/src/json.hpp index ff3fe826..97addceb 100644 --- a/src/json.hpp +++ b/src/json.hpp @@ -8485,8 +8485,7 @@ class basic_json { public: cached_input_stream_adapter(std::istream& i, const size_t buffer_size) - : is(i), start_position(is.tellg()), - buffer(buffer_size, std::char_traits::eof()) + : is(i), start_position(is.tellg()), buffer(buffer_size, '\0') { // immediately abort if stream is erroneous if (JSON_UNLIKELY(i.fail())) @@ -8494,12 +8493,13 @@ class basic_json JSON_THROW(parse_error::create(111, 0, "bad input stream")); } - // initial fill; unfilled buffer characters remain EOF + // initial fill is.read(buffer.data(), static_cast(buffer.size())); + // store number of bytes in the buffer + fill_size = static_cast(is.gcount()); // skip byte-order mark - assert(buffer.size() >= 3); - if (buffer[0] == '\xEF' and buffer[1] == '\xBB' and buffer[2] == '\xBF') + if (fill_size >= 3 and buffer[0] == '\xEF' and buffer[1] == '\xBB' and buffer[2] == '\xBF') { buffer_pos += 3; processed_chars += 3; @@ -8516,22 +8516,28 @@ class basic_json int get_character() override { - // check if refilling is necessary - if (JSON_UNLIKELY(buffer_pos == buffer.size())) + // check if refilling is necessary and possible + if (buffer_pos == fill_size and not eof) { // refill - is.read(reinterpret_cast(buffer.data()), static_cast(buffer.size())); - // set unfilled characters to EOF - std::fill_n(buffer.begin() + static_cast(is.gcount()), - buffer.size() - static_cast(is.gcount()), - std::char_traits::eof()); + is.read(buffer.data(), static_cast(buffer.size())); + // store number of bytes in the buffer + fill_size = static_cast(is.gcount()); + + // remember that filling did not yield new input + if (fill_size == 0) + { + eof = true; + } + // the buffer is ready buffer_pos = 0; } ++processed_chars; - const int res = buffer[buffer_pos++]; - return (res == std::char_traits::eof()) ? res : res & 0xFF; + return eof + ? std::char_traits::eof() + : buffer[buffer_pos++] & 0xFF; } std::string read(size_t offset, size_t length) override @@ -8568,6 +8574,11 @@ class basic_json /// chars processed in the current buffer size_t buffer_pos = 0; + /// whether stream reached eof + bool eof = false; + /// how many chars have been copied to the buffer by last (re)fill + size_t fill_size = 0; + /// position of the stream when we started const std::streampos start_position; From 6f99d5b2e978ee6ef448fe39a426ba12c101f8f9 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Sat, 8 Apr 2017 23:39:17 +0200 Subject: [PATCH 30/44] :hammer: fixed test case One test case for CBOR and MessagePack assumed little endianess. --- test/src/unit-cbor.cpp | 7 +------ test/src/unit-msgpack.cpp | 7 +------ 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/test/src/unit-cbor.cpp b/test/src/unit-cbor.cpp index ae6f9a76..debb3732 100644 --- a/test/src/unit-cbor.cpp +++ b/test/src/unit-cbor.cpp @@ -727,14 +727,9 @@ TEST_CASE("CBOR") const auto result = json::to_cbor(j); CHECK(result == expected); - // restore value (reverse array for endianess) - double restored; - std::reverse(expected.begin(), expected.end()); - memcpy(&restored, expected.data(), sizeof(double)); - CHECK(restored == v); - // roundtrip CHECK(json::from_cbor(result) == j); + CHECK(json::from_cbor(result) == v); } } diff --git a/test/src/unit-msgpack.cpp b/test/src/unit-msgpack.cpp index 3f1d1643..a580913d 100644 --- a/test/src/unit-msgpack.cpp +++ b/test/src/unit-msgpack.cpp @@ -676,14 +676,9 @@ TEST_CASE("MessagePack") const auto result = json::to_msgpack(j); CHECK(result == expected); - // restore value (reverse array for endianess) - double restored; - std::reverse(expected.begin(), expected.end()); - memcpy(&restored, expected.data(), sizeof(double)); - CHECK(restored == v); - // roundtrip CHECK(json::from_msgpack(result) == j); + CHECK(json::from_msgpack(result) == v); } } } From 186a9fd44d529648ad83e318e15729cadcfccd0b Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Sun, 9 Apr 2017 19:28:15 +0200 Subject: [PATCH 31/44] :hammer: simplified interface for parser, lexer, and binary_reader These classes are now constructed with an interface adapter. This moves complexity from various places into the interface adapter class, or to some factories which now implement the different flavors of input. Furthermore, input adapters are kept in std::shared_ptr to avoid the need of manual deletion. --- src/json.hpp | 183 +++++------ test/src/unit-class_lexer.cpp | 69 +++-- test/src/unit-class_parser.cpp | 543 +++++++++++++++++---------------- 3 files changed, 409 insertions(+), 386 deletions(-) diff --git a/src/json.hpp b/src/json.hpp index fbc996ad..20c5b2bc 100644 --- a/src/json.hpp +++ b/src/json.hpp @@ -7284,7 +7284,7 @@ class basic_json static basic_json parse(const CharT s, const parser_callback_t cb = nullptr) { - return parser(reinterpret_cast(s), cb).parse(true); + return parser(input_adapter::create(s), cb).parse(true); } /*! @@ -7319,7 +7319,7 @@ class basic_json static basic_json parse(std::istream& i, const parser_callback_t cb = nullptr) { - return parser(i, cb).parse(true); + return parser(input_adapter::create(i), cb).parse(true); } /*! @@ -7328,7 +7328,7 @@ class basic_json static basic_json parse(std::istream&& i, const parser_callback_t cb = nullptr) { - return parser(i, cb).parse(true); + return parser(input_adapter::create(i), cb).parse(true); } /*! @@ -7383,27 +7383,7 @@ class basic_json static basic_json parse(IteratorType first, IteratorType last, const parser_callback_t cb = nullptr) { - // assertion to check that the iterator range is indeed contiguous, - // see http://stackoverflow.com/a/35008842/266378 for more discussion - assert(std::accumulate(first, last, std::pair(true, 0), - [&first](std::pair res, decltype(*first) val) - { - res.first &= (val == *(std::next(std::addressof(*first), res.second++))); - return res; - }).first); - - // assertion to check that each element is 1 byte long - static_assert(sizeof(typename std::iterator_traits::value_type) == 1, - "each element in the iterator range must have the size of 1 byte"); - - // if iterator range is empty, create a parser with an empty string - // to generate "unexpected EOF" error message - if (std::distance(first, last) <= 0) - { - return parser("").parse(true); - } - - return parser(first, last, cb).parse(true); + return parser(input_adapter::create(first, last), cb).parse(true); } /*! @@ -7473,7 +7453,7 @@ class basic_json JSON_DEPRECATED friend std::istream& operator<<(basic_json& j, std::istream& i) { - j = parser(i).parse(true); + j = parser(input_adapter::create(i)).parse(true); return i; } @@ -7505,7 +7485,7 @@ class basic_json */ friend std::istream& operator>>(std::istream& i, basic_json& j) { - j = parser(i).parse(true); + j = parser(input_adapter::create(i)).parse(true); return i; } @@ -8566,6 +8546,84 @@ class basic_json virtual int get_character() = 0; virtual std::string read(size_t offset, size_t length) = 0; virtual ~input_adapter() {} + + // native support + + /// input adapter for input stream + static std::shared_ptr create(std::istream& i, const size_t buffer_size = 16384) + { + return std::shared_ptr(new cached_input_stream_adapter(i, buffer_size)); + } + + /// input adapter for input stream + static std::shared_ptr create(std::istream&& i, const size_t buffer_size = 16384) + { + return std::shared_ptr(new cached_input_stream_adapter(i, buffer_size)); + } + + /// input adapter for buffer + static std::shared_ptr create(const char* b, size_t l) + { + return std::shared_ptr(new input_buffer_adapter(b, l)); + } + + // derived support + + /// input adapter for string literal + template::value and + std::is_integral::type>::value and + sizeof(typename std::remove_pointer::type) == 1, int>::type = 0> + static std::shared_ptr create(CharT b) + { + return create(reinterpret_cast(b), + std::strlen(reinterpret_cast(b))); + } + + /// input adapter for iterator range with contiguous storage + template::iterator_category, std::random_access_iterator_tag>::value + , int>::type + = 0> + static std::shared_ptr create(IteratorType first, IteratorType last) + { + // assertion to check that the iterator range is indeed contiguous, + // see http://stackoverflow.com/a/35008842/266378 for more discussion + assert(std::accumulate(first, last, std::pair(true, 0), + [&first](std::pair res, decltype(*first) val) + { + res.first &= (val == *(std::next(std::addressof(*first), res.second++))); + return res; + }).first); + + // assertion to check that each element is 1 byte long + static_assert(sizeof(typename std::iterator_traits::value_type) == 1, + "each element in the iterator range must have the size of 1 byte"); + + return create(reinterpret_cast(&(*first)), + static_cast(std::distance(first, last))); + } + + /// input adapter for array + template + static std::shared_ptr create(T (&array)[N]) + { + // delegate the call to the iterator-range overload + return create(std::begin(array), std::end(array)); + } + + /// input adapter for contiguous container + template::value and + std::is_base_of< + std::random_access_iterator_tag, + typename std::iterator_traits()))>::iterator_category>::value + , int>::type = 0> + static std::shared_ptr create(const ContiguousContainer& c) + { + // delegate the call to the iterator-range overload + return create(std::begin(c), std::end(c)); + } }; /// input adapter for cached stream input @@ -8725,25 +8783,10 @@ class basic_json class binary_reader { public: - explicit binary_reader(std::istream& i) - : ia(new cached_input_stream_adapter(i, 16384)), - is_little_endian(little_endianess()) + explicit binary_reader(std::shared_ptr a) + : ia(a), is_little_endian(little_endianess()) {} - binary_reader(const char* buff, const size_t len) - : ia(new input_buffer_adapter(buff, len)), - is_little_endian(little_endianess()) - {} - - ~binary_reader() - { - delete ia; - } - - // switch off unwanted functions (due to pointer members) - binary_reader(const binary_reader&) = delete; - binary_reader operator=(const binary_reader&) = delete; - /*! @param[in] get_char whether a new character should be retrieved from the input (true, default) or whether the last @@ -9764,7 +9807,7 @@ class basic_json private: /// input adapter - input_adapter* ia = nullptr; + std::shared_ptr ia = nullptr; /// the current character int current = std::char_traits::eof(); @@ -10560,7 +10603,7 @@ class basic_json static basic_json from_cbor(const std::vector& v, const size_t start_index = 0) { - binary_reader br(reinterpret_cast(v.data() + start_index), v.size() - start_index); + binary_reader br(input_adapter::create(v.begin() + static_cast(start_index), v.end())); return br.parse_cbor(); } @@ -10635,7 +10678,7 @@ class basic_json static basic_json from_msgpack(const std::vector& v, const size_t start_index = 0) { - binary_reader br(reinterpret_cast(v.data() + start_index), v.size() - start_index); + binary_reader br(input_adapter::create(v.begin() + static_cast(start_index), v.end())); return br.parse_msgpack(); } @@ -10718,26 +10761,10 @@ class basic_json } } - explicit lexer(std::istream& i) - : ia(new cached_input_stream_adapter(i, 16384)), - decimal_point_char(get_decimal_point()) + explicit lexer(std::shared_ptr a) + : ia(a), decimal_point_char(get_decimal_point()) {} - lexer(const char* buff, const size_t len) - : ia(new input_buffer_adapter(buff, len)), - decimal_point_char(get_decimal_point()) - {} - - ~lexer() - { - delete ia; - } - - // switch off unwanted functions (due to pointer members) - lexer() = delete; - lexer(const lexer&) = delete; - lexer operator=(const lexer&) = delete; - private: ///////////////////// // locales @@ -12091,7 +12118,7 @@ scan_number_done: private: /// input adapter - input_adapter* ia = nullptr; + std::shared_ptr ia = nullptr; /// the current character int current = std::char_traits::eof(); @@ -12129,28 +12156,10 @@ scan_number_done: class parser { public: - /// a parser reading from a string literal - parser(const char* buff, const parser_callback_t cb = nullptr) - : callback(cb), m_lexer(buff, std::strlen(buff)) - {} - - /*! - @brief a parser reading from an input stream - @throw parse_error.111 if input stream is in a bad state - */ - parser(std::istream& is, const parser_callback_t cb = nullptr) - : callback(cb), m_lexer(is) - {} - - /// a parser reading from an iterator range with contiguous storage - template::iterator_category, std::random_access_iterator_tag>::value - , int>::type - = 0> - parser(IteratorType first, IteratorType last, const parser_callback_t cb = nullptr) - : callback(cb), - m_lexer(reinterpret_cast(&(*first)), - static_cast(std::distance(first, last))) + /// a parser reading from an input adapter + explicit parser(std::shared_ptr ia, + const parser_callback_t cb = nullptr) + : callback(cb), m_lexer(ia) {} /*! diff --git a/test/src/unit-class_lexer.cpp b/test/src/unit-class_lexer.cpp index 2acea176..071337b5 100644 --- a/test/src/unit-class_lexer.cpp +++ b/test/src/unit-class_lexer.cpp @@ -32,56 +32,63 @@ SOFTWARE. #include "json.hpp" using nlohmann::json; +// shortcut to scan a string literal +json::lexer::token_type scan_string(const char* s); +json::lexer::token_type scan_string(const char* s) +{ + return json::lexer(json::input_adapter::create(s)).scan(); +} + TEST_CASE("lexer class") { SECTION("scan") { SECTION("structural characters") { - CHECK((json::lexer("[", 1).scan() == json::lexer::token_type::begin_array)); - CHECK((json::lexer("]", 1).scan() == json::lexer::token_type::end_array)); - CHECK((json::lexer("{", 1).scan() == json::lexer::token_type::begin_object)); - CHECK((json::lexer("}", 1).scan() == json::lexer::token_type::end_object)); - CHECK((json::lexer(",", 1).scan() == json::lexer::token_type::value_separator)); - CHECK((json::lexer(":", 1).scan() == json::lexer::token_type::name_separator)); + CHECK((scan_string("[") == json::lexer::token_type::begin_array)); + CHECK((scan_string("]") == json::lexer::token_type::end_array)); + CHECK((scan_string("{") == json::lexer::token_type::begin_object)); + CHECK((scan_string("}") == json::lexer::token_type::end_object)); + CHECK((scan_string(",") == json::lexer::token_type::value_separator)); + CHECK((scan_string(":") == json::lexer::token_type::name_separator)); } SECTION("literal names") { - CHECK((json::lexer("null", 4).scan() == json::lexer::token_type::literal_null)); - CHECK((json::lexer("true", 4).scan() == json::lexer::token_type::literal_true)); - CHECK((json::lexer("false", 5).scan() == json::lexer::token_type::literal_false)); + CHECK((scan_string("null") == json::lexer::token_type::literal_null)); + CHECK((scan_string("true") == json::lexer::token_type::literal_true)); + CHECK((scan_string("false") == json::lexer::token_type::literal_false)); } SECTION("numbers") { - CHECK((json::lexer("0", 1).scan() == json::lexer::token_type::value_unsigned)); - CHECK((json::lexer("1", 1).scan() == json::lexer::token_type::value_unsigned)); - CHECK((json::lexer("2", 1).scan() == json::lexer::token_type::value_unsigned)); - CHECK((json::lexer("3", 1).scan() == json::lexer::token_type::value_unsigned)); - CHECK((json::lexer("4", 1).scan() == json::lexer::token_type::value_unsigned)); - CHECK((json::lexer("5", 1).scan() == json::lexer::token_type::value_unsigned)); - CHECK((json::lexer("6", 1).scan() == json::lexer::token_type::value_unsigned)); - CHECK((json::lexer("7", 1).scan() == json::lexer::token_type::value_unsigned)); - CHECK((json::lexer("8", 1).scan() == json::lexer::token_type::value_unsigned)); - CHECK((json::lexer("9", 1).scan() == json::lexer::token_type::value_unsigned)); + CHECK((scan_string("0") == json::lexer::token_type::value_unsigned)); + CHECK((scan_string("1") == json::lexer::token_type::value_unsigned)); + CHECK((scan_string("2") == json::lexer::token_type::value_unsigned)); + CHECK((scan_string("3") == json::lexer::token_type::value_unsigned)); + CHECK((scan_string("4") == json::lexer::token_type::value_unsigned)); + CHECK((scan_string("5") == json::lexer::token_type::value_unsigned)); + CHECK((scan_string("6") == json::lexer::token_type::value_unsigned)); + CHECK((scan_string("7") == json::lexer::token_type::value_unsigned)); + CHECK((scan_string("8") == json::lexer::token_type::value_unsigned)); + CHECK((scan_string("9") == json::lexer::token_type::value_unsigned)); - CHECK((json::lexer("-0", 2).scan() == json::lexer::token_type::value_integer)); - CHECK((json::lexer("-1", 2).scan() == json::lexer::token_type::value_integer)); + CHECK((scan_string("-0") == json::lexer::token_type::value_integer)); + CHECK((scan_string("-1") == json::lexer::token_type::value_integer)); - CHECK((json::lexer("1.1", 3).scan() == json::lexer::token_type::value_float)); - CHECK((json::lexer("-1.1", 4).scan() == json::lexer::token_type::value_float)); - CHECK((json::lexer("1E10", 4).scan() == json::lexer::token_type::value_float)); + CHECK((scan_string("1.1") == json::lexer::token_type::value_float)); + CHECK((scan_string("-1.1") == json::lexer::token_type::value_float)); + CHECK((scan_string("1E10") == json::lexer::token_type::value_float)); } SECTION("whitespace") { // result is end_of_input, because not token is following - CHECK((json::lexer(" ", 1).scan() == json::lexer::token_type::end_of_input)); - CHECK((json::lexer("\t", 1).scan() == json::lexer::token_type::end_of_input)); - CHECK((json::lexer("\n", 1).scan() == json::lexer::token_type::end_of_input)); - CHECK((json::lexer("\r", 1).scan() == json::lexer::token_type::end_of_input)); - CHECK((json::lexer(" \t\n\r\n\t ", 7).scan() == json::lexer::token_type::end_of_input)); + CHECK((scan_string(" ") == json::lexer::token_type::end_of_input)); + CHECK((scan_string("\t") == json::lexer::token_type::end_of_input)); + CHECK((scan_string("\n") == json::lexer::token_type::end_of_input)); + CHECK((scan_string("\r") == json::lexer::token_type::end_of_input)); + CHECK((scan_string(" \t\n\r\n\t ") == json::lexer::token_type::end_of_input)); } } @@ -112,7 +119,7 @@ TEST_CASE("lexer class") // create string from the ASCII code const auto s = std::string(1, static_cast(c)); // store scan() result - const auto res = json::lexer(s.c_str(), 1).scan(); + const auto res = scan_string(s.c_str()); switch (c) { @@ -164,7 +171,7 @@ TEST_CASE("lexer class") std::string s("\""); s += std::string(2048, 'x'); s += "\""; - CHECK((json::lexer(s.c_str(), 2050).scan() == json::lexer::token_type::value_string)); + CHECK((scan_string(s.c_str()) == json::lexer::token_type::value_string)); } /* NOTE: to_unicode function has been removed diff --git a/test/src/unit-class_parser.cpp b/test/src/unit-class_parser.cpp index 864b7be1..e0fffac4 100644 --- a/test/src/unit-class_parser.cpp +++ b/test/src/unit-class_parser.cpp @@ -34,36 +34,43 @@ using nlohmann::json; #include +// shortcut to parse a string literal +json::parser parse_string(const char* s); +json::parser parse_string(const char* s) +{ + return json::parser(json::input_adapter::create(s)); +} + TEST_CASE("parser class") { SECTION("parse") { SECTION("null") { - CHECK(json::parser("null").parse() == json(nullptr)); + CHECK(parse_string("null").parse() == json(nullptr)); } SECTION("true") { - CHECK(json::parser("true").parse() == json(true)); + CHECK(parse_string("true").parse() == json(true)); } SECTION("false") { - CHECK(json::parser("false").parse() == json(false)); + CHECK(parse_string("false").parse() == json(false)); } SECTION("array") { SECTION("empty array") { - CHECK(json::parser("[]").parse() == json(json::value_t::array)); - CHECK(json::parser("[ ]").parse() == json(json::value_t::array)); + CHECK(parse_string("[]").parse() == json(json::value_t::array)); + CHECK(parse_string("[ ]").parse() == json(json::value_t::array)); } SECTION("nonempty array") { - CHECK(json::parser("[true, false, null]").parse() == json({true, false, nullptr})); + CHECK(parse_string("[true, false, null]").parse() == json({true, false, nullptr})); } } @@ -71,113 +78,113 @@ TEST_CASE("parser class") { SECTION("empty object") { - CHECK(json::parser("{}").parse() == json(json::value_t::object)); - CHECK(json::parser("{ }").parse() == json(json::value_t::object)); + CHECK(parse_string("{}").parse() == json(json::value_t::object)); + CHECK(parse_string("{ }").parse() == json(json::value_t::object)); } SECTION("nonempty object") { - CHECK(json::parser("{\"\": true, \"one\": 1, \"two\": null}").parse() == json({{"", true}, {"one", 1}, {"two", nullptr}})); + CHECK(parse_string("{\"\": true, \"one\": 1, \"two\": null}").parse() == json({{"", true}, {"one", 1}, {"two", nullptr}})); } } SECTION("string") { // empty string - CHECK(json::parser("\"\"").parse() == json(json::value_t::string)); + CHECK(parse_string("\"\"").parse() == json(json::value_t::string)); SECTION("errors") { // error: tab in string - CHECK_THROWS_AS(json::parser("\"\t\"").parse(), json::parse_error); - CHECK_THROWS_WITH(json::parser("\"\t\"").parse(), + CHECK_THROWS_AS(parse_string("\"\t\"").parse(), json::parse_error); + CHECK_THROWS_WITH(parse_string("\"\t\"").parse(), "[json.exception.parse_error.101] parse error at 2: syntax error - invalid string: control characters (U+0000 through U+001f) must be escaped; last read '\"'"); // error: newline in string - CHECK_THROWS_AS(json::parser("\"\n\"").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("\"\r\"").parse(), json::parse_error); - CHECK_THROWS_WITH(json::parser("\"\n\"").parse(), + CHECK_THROWS_AS(parse_string("\"\n\"").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("\"\r\"").parse(), json::parse_error); + CHECK_THROWS_WITH(parse_string("\"\n\"").parse(), "[json.exception.parse_error.101] parse error at 2: syntax error - invalid string: control characters (U+0000 through U+001f) must be escaped; last read '\"'"); - CHECK_THROWS_WITH(json::parser("\"\r\"").parse(), + CHECK_THROWS_WITH(parse_string("\"\r\"").parse(), "[json.exception.parse_error.101] parse error at 2: syntax error - invalid string: control characters (U+0000 through U+001f) must be escaped; last read '\"'"); // error: backspace in string - CHECK_THROWS_AS(json::parser("\"\b\"").parse(), json::parse_error); - CHECK_THROWS_WITH(json::parser("\"\b\"").parse(), + CHECK_THROWS_AS(parse_string("\"\b\"").parse(), json::parse_error); + CHECK_THROWS_WITH(parse_string("\"\b\"").parse(), "[json.exception.parse_error.101] parse error at 2: syntax error - invalid string: control characters (U+0000 through U+001f) must be escaped; last read '\"'"); // improve code coverage - CHECK_THROWS_AS(json::parser("\uFF01").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("[-4:1,]").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("\uFF01").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("[-4:1,]").parse(), json::parse_error); // unescaped control characters - CHECK_THROWS_AS(json::parser("\"\x00\"").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("\"\x01\"").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("\"\x02\"").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("\"\x03\"").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("\"\x04\"").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("\"\x05\"").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("\"\x06\"").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("\"\x07\"").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("\"\x08\"").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("\"\x09\"").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("\"\x0a\"").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("\"\x0b\"").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("\"\x0c\"").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("\"\x0d\"").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("\"\x0e\"").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("\"\x0f\"").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("\"\x10\"").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("\"\x11\"").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("\"\x12\"").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("\"\x13\"").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("\"\x14\"").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("\"\x15\"").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("\"\x16\"").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("\"\x17\"").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("\"\x18\"").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("\"\x19\"").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("\"\x1a\"").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("\"\x1b\"").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("\"\x1c\"").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("\"\x1d\"").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("\"\x1e\"").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("\"\x1f\"").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("\"\x00\"").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("\"\x01\"").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("\"\x02\"").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("\"\x03\"").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("\"\x04\"").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("\"\x05\"").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("\"\x06\"").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("\"\x07\"").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("\"\x08\"").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("\"\x09\"").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("\"\x0a\"").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("\"\x0b\"").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("\"\x0c\"").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("\"\x0d\"").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("\"\x0e\"").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("\"\x0f\"").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("\"\x10\"").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("\"\x11\"").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("\"\x12\"").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("\"\x13\"").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("\"\x14\"").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("\"\x15\"").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("\"\x16\"").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("\"\x17\"").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("\"\x18\"").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("\"\x19\"").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("\"\x1a\"").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("\"\x1b\"").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("\"\x1c\"").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("\"\x1d\"").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("\"\x1e\"").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("\"\x1f\"").parse(), json::parse_error); } SECTION("escaped") { // quotation mark "\"" auto r1 = R"("\"")"_json; - CHECK(json::parser("\"\\\"\"").parse() == r1); + CHECK(parse_string("\"\\\"\"").parse() == r1); // reverse solidus "\\" auto r2 = R"("\\")"_json; - CHECK(json::parser("\"\\\\\"").parse() == r2); + CHECK(parse_string("\"\\\\\"").parse() == r2); // solidus - CHECK(json::parser("\"\\/\"").parse() == R"("/")"_json); + CHECK(parse_string("\"\\/\"").parse() == R"("/")"_json); // backspace - CHECK(json::parser("\"\\b\"").parse() == json("\b")); + CHECK(parse_string("\"\\b\"").parse() == json("\b")); // formfeed - CHECK(json::parser("\"\\f\"").parse() == json("\f")); + CHECK(parse_string("\"\\f\"").parse() == json("\f")); // newline - CHECK(json::parser("\"\\n\"").parse() == json("\n")); + CHECK(parse_string("\"\\n\"").parse() == json("\n")); // carriage return - CHECK(json::parser("\"\\r\"").parse() == json("\r")); + CHECK(parse_string("\"\\r\"").parse() == json("\r")); // horizontal tab - CHECK(json::parser("\"\\t\"").parse() == json("\t")); + CHECK(parse_string("\"\\t\"").parse() == json("\t")); - CHECK(json::parser("\"\\u0001\"").parse().get() == "\x01"); - CHECK(json::parser("\"\\u000a\"").parse().get() == "\n"); - CHECK(json::parser("\"\\u00b0\"").parse().get() == "°"); - CHECK(json::parser("\"\\u0c00\"").parse().get() == "ఀ"); - CHECK(json::parser("\"\\ud000\"").parse().get() == "퀀"); - CHECK(json::parser("\"\\u000E\"").parse().get() == "\x0E"); - CHECK(json::parser("\"\\u00F0\"").parse().get() == "ð"); - CHECK(json::parser("\"\\u0100\"").parse().get() == "Ā"); - CHECK(json::parser("\"\\u2000\"").parse().get() == " "); - CHECK(json::parser("\"\\uFFFF\"").parse().get() == "￿"); - CHECK(json::parser("\"\\u20AC\"").parse().get() == "€"); - CHECK(json::parser("\"€\"").parse().get() == "€"); - CHECK(json::parser("\"🎈\"").parse().get() == "🎈"); + CHECK(parse_string("\"\\u0001\"").parse().get() == "\x01"); + CHECK(parse_string("\"\\u000a\"").parse().get() == "\n"); + CHECK(parse_string("\"\\u00b0\"").parse().get() == "°"); + CHECK(parse_string("\"\\u0c00\"").parse().get() == "ఀ"); + CHECK(parse_string("\"\\ud000\"").parse().get() == "퀀"); + CHECK(parse_string("\"\\u000E\"").parse().get() == "\x0E"); + CHECK(parse_string("\"\\u00F0\"").parse().get() == "ð"); + CHECK(parse_string("\"\\u0100\"").parse().get() == "Ā"); + CHECK(parse_string("\"\\u2000\"").parse().get() == " "); + CHECK(parse_string("\"\\uFFFF\"").parse().get() == "￿"); + CHECK(parse_string("\"\\u20AC\"").parse().get() == "€"); + CHECK(parse_string("\"€\"").parse().get() == "€"); + CHECK(parse_string("\"🎈\"").parse().get() == "🎈"); - CHECK(json::parse("\"\\ud80c\\udc60\"").get() == u8"\U00013060"); - CHECK(json::parse("\"\\ud83c\\udf1e\"").get() == "🌞"); + CHECK(parse_string("\"\\ud80c\\udc60\"").parse().get() == u8"\U00013060"); + CHECK(parse_string("\"\\ud83c\\udf1e\"").parse().get() == "🌞"); } } @@ -187,40 +194,40 @@ TEST_CASE("parser class") { SECTION("without exponent") { - CHECK(json::parser("-128").parse() == json(-128)); - CHECK(json::parser("-0").parse() == json(-0)); - CHECK(json::parser("0").parse() == json(0)); - CHECK(json::parser("128").parse() == json(128)); + CHECK(parse_string("-128").parse() == json(-128)); + CHECK(parse_string("-0").parse() == json(-0)); + CHECK(parse_string("0").parse() == json(0)); + CHECK(parse_string("128").parse() == json(128)); } SECTION("with exponent") { - CHECK(json::parser("0e1").parse() == json(0e1)); - CHECK(json::parser("0E1").parse() == json(0e1)); + CHECK(parse_string("0e1").parse() == json(0e1)); + CHECK(parse_string("0E1").parse() == json(0e1)); - CHECK(json::parser("10000E-4").parse() == json(10000e-4)); - CHECK(json::parser("10000E-3").parse() == json(10000e-3)); - CHECK(json::parser("10000E-2").parse() == json(10000e-2)); - CHECK(json::parser("10000E-1").parse() == json(10000e-1)); - CHECK(json::parser("10000E0").parse() == json(10000e0)); - CHECK(json::parser("10000E1").parse() == json(10000e1)); - CHECK(json::parser("10000E2").parse() == json(10000e2)); - CHECK(json::parser("10000E3").parse() == json(10000e3)); - CHECK(json::parser("10000E4").parse() == json(10000e4)); + CHECK(parse_string("10000E-4").parse() == json(10000e-4)); + CHECK(parse_string("10000E-3").parse() == json(10000e-3)); + CHECK(parse_string("10000E-2").parse() == json(10000e-2)); + CHECK(parse_string("10000E-1").parse() == json(10000e-1)); + CHECK(parse_string("10000E0").parse() == json(10000e0)); + CHECK(parse_string("10000E1").parse() == json(10000e1)); + CHECK(parse_string("10000E2").parse() == json(10000e2)); + CHECK(parse_string("10000E3").parse() == json(10000e3)); + CHECK(parse_string("10000E4").parse() == json(10000e4)); - CHECK(json::parser("10000e-4").parse() == json(10000e-4)); - CHECK(json::parser("10000e-3").parse() == json(10000e-3)); - CHECK(json::parser("10000e-2").parse() == json(10000e-2)); - CHECK(json::parser("10000e-1").parse() == json(10000e-1)); - CHECK(json::parser("10000e0").parse() == json(10000e0)); - CHECK(json::parser("10000e1").parse() == json(10000e1)); - CHECK(json::parser("10000e2").parse() == json(10000e2)); - CHECK(json::parser("10000e3").parse() == json(10000e3)); - CHECK(json::parser("10000e4").parse() == json(10000e4)); + CHECK(parse_string("10000e-4").parse() == json(10000e-4)); + CHECK(parse_string("10000e-3").parse() == json(10000e-3)); + CHECK(parse_string("10000e-2").parse() == json(10000e-2)); + CHECK(parse_string("10000e-1").parse() == json(10000e-1)); + CHECK(parse_string("10000e0").parse() == json(10000e0)); + CHECK(parse_string("10000e1").parse() == json(10000e1)); + CHECK(parse_string("10000e2").parse() == json(10000e2)); + CHECK(parse_string("10000e3").parse() == json(10000e3)); + CHECK(parse_string("10000e4").parse() == json(10000e4)); - CHECK(json::parser("-0e1").parse() == json(-0e1)); - CHECK(json::parser("-0E1").parse() == json(-0e1)); - CHECK(json::parser("-0E123").parse() == json(-0e123)); + CHECK(parse_string("-0e1").parse() == json(-0e1)); + CHECK(parse_string("-0E1").parse() == json(-0e1)); + CHECK(parse_string("-0E123").parse() == json(-0e123)); } SECTION("edge cases") @@ -232,9 +239,9 @@ TEST_CASE("parser class") // agree exactly on their numeric values. // -(2**53)+1 - CHECK(json::parser("-9007199254740991").parse().get() == -9007199254740991); + CHECK(parse_string("-9007199254740991").parse().get() == -9007199254740991); // (2**53)-1 - CHECK(json::parser("9007199254740991").parse().get() == 9007199254740991); + CHECK(parse_string("9007199254740991").parse().get() == 9007199254740991); } SECTION("over the edge cases") // issue #178 - Integer conversion to unsigned (incorrect handling of 64 bit integers) @@ -247,11 +254,11 @@ TEST_CASE("parser class") // i.e. -(2**63) -> (2**64)-1. // -(2**63) ** Note: compilers see negative literals as negated positive numbers (hence the -1)) - CHECK(json::parser("-9223372036854775808").parse().get() == -9223372036854775807 - 1); + CHECK(parse_string("-9223372036854775808").parse().get() == -9223372036854775807 - 1); // (2**63)-1 - CHECK(json::parser("9223372036854775807").parse().get() == 9223372036854775807); + CHECK(parse_string("9223372036854775807").parse().get() == 9223372036854775807); // (2**64)-1 - CHECK(json::parser("18446744073709551615").parse().get() == 18446744073709551615u); + CHECK(parse_string("18446744073709551615").parse().get() == 18446744073709551615u); } } @@ -259,85 +266,85 @@ TEST_CASE("parser class") { SECTION("without exponent") { - CHECK(json::parser("-128.5").parse() == json(-128.5)); - CHECK(json::parser("0.999").parse() == json(0.999)); - CHECK(json::parser("128.5").parse() == json(128.5)); - CHECK(json::parser("-0.0").parse() == json(-0.0)); + CHECK(parse_string("-128.5").parse() == json(-128.5)); + CHECK(parse_string("0.999").parse() == json(0.999)); + CHECK(parse_string("128.5").parse() == json(128.5)); + CHECK(parse_string("-0.0").parse() == json(-0.0)); } SECTION("with exponent") { - CHECK(json::parser("-128.5E3").parse() == json(-128.5E3)); - CHECK(json::parser("-128.5E-3").parse() == json(-128.5E-3)); - CHECK(json::parser("-0.0e1").parse() == json(-0.0e1)); - CHECK(json::parser("-0.0E1").parse() == json(-0.0e1)); + CHECK(parse_string("-128.5E3").parse() == json(-128.5E3)); + CHECK(parse_string("-128.5E-3").parse() == json(-128.5E-3)); + CHECK(parse_string("-0.0e1").parse() == json(-0.0e1)); + CHECK(parse_string("-0.0E1").parse() == json(-0.0e1)); } } SECTION("overflow") { // overflows during parsing yield an exception - CHECK_THROWS_AS(json::parser("1.18973e+4932").parse() == json(), json::out_of_range); - CHECK_THROWS_WITH(json::parser("1.18973e+4932").parse() == json(), + CHECK_THROWS_AS(parse_string("1.18973e+4932").parse() == json(), json::out_of_range); + CHECK_THROWS_WITH(parse_string("1.18973e+4932").parse() == json(), "[json.exception.out_of_range.406] number overflow parsing '1.18973e+4932'"); } SECTION("invalid numbers") { - CHECK_THROWS_AS(json::parser("01").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("--1").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("1.").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("1E").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("1E-").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("1.E1").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("-1E").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("-0E#").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("-0E-#").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("-0#").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("-0.0:").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("-0.0Z").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("-0E123:").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("-0e0-:").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("-0e-:").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("-0f").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("01").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("--1").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("1.").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("1E").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("1E-").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("1.E1").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("-1E").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("-0E#").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("-0E-#").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("-0#").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("-0.0:").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("-0.0Z").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("-0E123:").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("-0e0-:").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("-0e-:").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("-0f").parse(), json::parse_error); // numbers must not begin with "+" - CHECK_THROWS_AS(json::parser("+1").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("+0").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("+1").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("+0").parse(), json::parse_error); - CHECK_THROWS_WITH(json::parser("01").parse(), + CHECK_THROWS_WITH(parse_string("01").parse(), "[json.exception.parse_error.101] parse error at 2: syntax error - unexpected number literal; expected end of input"); - CHECK_THROWS_WITH(json::parser("-01").parse(), + CHECK_THROWS_WITH(parse_string("-01").parse(), "[json.exception.parse_error.101] parse error at 3: syntax error - unexpected number literal; expected end of input"); - CHECK_THROWS_WITH(json::parser("--1").parse(), + CHECK_THROWS_WITH(parse_string("--1").parse(), "[json.exception.parse_error.101] parse error at 2: syntax error - invalid number; expected digit after '-'; last read '--'"); - CHECK_THROWS_WITH(json::parser("1.").parse(), + CHECK_THROWS_WITH(parse_string("1.").parse(), "[json.exception.parse_error.101] parse error at 3: syntax error - invalid number; expected digit after '.'; last read '1.'"); - CHECK_THROWS_WITH(json::parser("1E").parse(), + CHECK_THROWS_WITH(parse_string("1E").parse(), "[json.exception.parse_error.101] parse error at 3: syntax error - invalid number; expected '+', '-', or digit after exponent; last read '1E'"); - CHECK_THROWS_WITH(json::parser("1E-").parse(), + CHECK_THROWS_WITH(parse_string("1E-").parse(), "[json.exception.parse_error.101] parse error at 4: syntax error - invalid number; expected digit after exponent sign; last read '1E-'"); - CHECK_THROWS_WITH(json::parser("1.E1").parse(), + CHECK_THROWS_WITH(parse_string("1.E1").parse(), "[json.exception.parse_error.101] parse error at 3: syntax error - invalid number; expected digit after '.'; last read '1.E'"); - CHECK_THROWS_WITH(json::parser("-1E").parse(), + CHECK_THROWS_WITH(parse_string("-1E").parse(), "[json.exception.parse_error.101] parse error at 4: syntax error - invalid number; expected '+', '-', or digit after exponent; last read '-1E'"); - CHECK_THROWS_WITH(json::parser("-0E#").parse(), + CHECK_THROWS_WITH(parse_string("-0E#").parse(), "[json.exception.parse_error.101] parse error at 4: syntax error - invalid number; expected '+', '-', or digit after exponent; last read '-0E#'"); - CHECK_THROWS_WITH(json::parser("-0E-#").parse(), + CHECK_THROWS_WITH(parse_string("-0E-#").parse(), "[json.exception.parse_error.101] parse error at 5: syntax error - invalid number; expected digit after exponent sign; last read '-0E-#'"); - CHECK_THROWS_WITH(json::parser("-0#").parse(), + CHECK_THROWS_WITH(parse_string("-0#").parse(), "[json.exception.parse_error.101] parse error at 3: syntax error - invalid literal; last read: '-0#'; expected end of input"); - CHECK_THROWS_WITH(json::parser("-0.0:").parse(), + CHECK_THROWS_WITH(parse_string("-0.0:").parse(), "[json.exception.parse_error.101] parse error at 5: syntax error - unexpected ':'; expected end of input"); - CHECK_THROWS_WITH(json::parser("-0.0Z").parse(), + CHECK_THROWS_WITH(parse_string("-0.0Z").parse(), "[json.exception.parse_error.101] parse error at 5: syntax error - invalid literal; last read: '-0.0Z'; expected end of input"); - CHECK_THROWS_WITH(json::parser("-0E123:").parse(), + CHECK_THROWS_WITH(parse_string("-0E123:").parse(), "[json.exception.parse_error.101] parse error at 7: syntax error - unexpected ':'; expected end of input"); - CHECK_THROWS_WITH(json::parser("-0e0-:").parse(), + CHECK_THROWS_WITH(parse_string("-0e0-:").parse(), "[json.exception.parse_error.101] parse error at 6: syntax error - invalid number; expected digit after '-'; last read: '-:'; expected end of input"); - CHECK_THROWS_WITH(json::parser("-0e-:").parse(), + CHECK_THROWS_WITH(parse_string("-0e-:").parse(), "[json.exception.parse_error.101] parse error at 5: syntax error - invalid number; expected digit after exponent sign; last read '-0e-:'"); - CHECK_THROWS_WITH(json::parser("-0f").parse(), + CHECK_THROWS_WITH(parse_string("-0f").parse(), "[json.exception.parse_error.101] parse error at 4: syntax error - invalid literal; expected 'false'; last read: '-0f'; expected end of input"); } } @@ -346,152 +353,152 @@ TEST_CASE("parser class") SECTION("parse errors") { // unexpected end of number - CHECK_THROWS_AS(json::parser("0.").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("-").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("--").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("-0.").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("-.").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("-:").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("0.:").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("e.").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("1e.").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("1e/").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("1e:").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("1E.").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("1E/").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("1E:").parse(), json::parse_error); - CHECK_THROWS_WITH(json::parser("0.").parse(), + CHECK_THROWS_AS(parse_string("0.").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("-").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("--").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("-0.").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("-.").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("-:").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("0.:").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("e.").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("1e.").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("1e/").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("1e:").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("1E.").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("1E/").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("1E:").parse(), json::parse_error); + CHECK_THROWS_WITH(parse_string("0.").parse(), "[json.exception.parse_error.101] parse error at 3: syntax error - invalid number; expected digit after '.'; last read '0.'"); - CHECK_THROWS_WITH(json::parser("-").parse(), + CHECK_THROWS_WITH(parse_string("-").parse(), "[json.exception.parse_error.101] parse error at 2: syntax error - invalid number; expected digit after '-'; last read '-'"); - CHECK_THROWS_WITH(json::parser("--").parse(), + CHECK_THROWS_WITH(parse_string("--").parse(), "[json.exception.parse_error.101] parse error at 2: syntax error - invalid number; expected digit after '-'; last read '--'"); - CHECK_THROWS_WITH(json::parser("-0.").parse(), + CHECK_THROWS_WITH(parse_string("-0.").parse(), "[json.exception.parse_error.101] parse error at 4: syntax error - invalid number; expected digit after '.'; last read '-0.'"); - CHECK_THROWS_WITH(json::parser("-.").parse(), + CHECK_THROWS_WITH(parse_string("-.").parse(), "[json.exception.parse_error.101] parse error at 2: syntax error - invalid number; expected digit after '-'; last read '-.'"); - CHECK_THROWS_WITH(json::parser("-:").parse(), + CHECK_THROWS_WITH(parse_string("-:").parse(), "[json.exception.parse_error.101] parse error at 2: syntax error - invalid number; expected digit after '-'; last read '-:'"); - CHECK_THROWS_WITH(json::parser("0.:").parse(), + CHECK_THROWS_WITH(parse_string("0.:").parse(), "[json.exception.parse_error.101] parse error at 3: syntax error - invalid number; expected digit after '.'; last read '0.:'"); - CHECK_THROWS_WITH(json::parser("e.").parse(), + CHECK_THROWS_WITH(parse_string("e.").parse(), "[json.exception.parse_error.101] parse error at 1: syntax error - invalid literal; last read 'e'"); - CHECK_THROWS_WITH(json::parser("1e.").parse(), + CHECK_THROWS_WITH(parse_string("1e.").parse(), "[json.exception.parse_error.101] parse error at 3: syntax error - invalid number; expected '+', '-', or digit after exponent; last read '1e.'"); - CHECK_THROWS_WITH(json::parser("1e/").parse(), + CHECK_THROWS_WITH(parse_string("1e/").parse(), "[json.exception.parse_error.101] parse error at 3: syntax error - invalid number; expected '+', '-', or digit after exponent; last read '1e/'"); - CHECK_THROWS_WITH(json::parser("1e:").parse(), + CHECK_THROWS_WITH(parse_string("1e:").parse(), "[json.exception.parse_error.101] parse error at 3: syntax error - invalid number; expected '+', '-', or digit after exponent; last read '1e:'"); - CHECK_THROWS_WITH(json::parser("1E.").parse(), + CHECK_THROWS_WITH(parse_string("1E.").parse(), "[json.exception.parse_error.101] parse error at 3: syntax error - invalid number; expected '+', '-', or digit after exponent; last read '1E.'"); - CHECK_THROWS_WITH(json::parser("1E/").parse(), + CHECK_THROWS_WITH(parse_string("1E/").parse(), "[json.exception.parse_error.101] parse error at 3: syntax error - invalid number; expected '+', '-', or digit after exponent; last read '1E/'"); - CHECK_THROWS_WITH(json::parser("1E:").parse(), + CHECK_THROWS_WITH(parse_string("1E:").parse(), "[json.exception.parse_error.101] parse error at 3: syntax error - invalid number; expected '+', '-', or digit after exponent; last read '1E:'"); // unexpected end of null - CHECK_THROWS_AS(json::parser("n").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("nu").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("nul").parse(), json::parse_error); - CHECK_THROWS_WITH(json::parser("n").parse(), + CHECK_THROWS_AS(parse_string("n").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("nu").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("nul").parse(), json::parse_error); + CHECK_THROWS_WITH(parse_string("n").parse(), "[json.exception.parse_error.101] parse error at 2: syntax error - invalid literal; expected 'null'; last read 'n'"); - CHECK_THROWS_WITH(json::parser("nu").parse(), + CHECK_THROWS_WITH(parse_string("nu").parse(), "[json.exception.parse_error.101] parse error at 3: syntax error - invalid literal; expected 'null'; last read 'nu'"); - CHECK_THROWS_WITH(json::parser("nul").parse(), + CHECK_THROWS_WITH(parse_string("nul").parse(), "[json.exception.parse_error.101] parse error at 4: syntax error - invalid literal; expected 'null'; last read 'nul'"); // unexpected end of true - CHECK_THROWS_AS(json::parser("t").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("tr").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("tru").parse(), json::parse_error); - CHECK_THROWS_WITH(json::parser("t").parse(), + CHECK_THROWS_AS(parse_string("t").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("tr").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("tru").parse(), json::parse_error); + CHECK_THROWS_WITH(parse_string("t").parse(), "[json.exception.parse_error.101] parse error at 2: syntax error - invalid literal; expected 'true'; last read 't'"); - CHECK_THROWS_WITH(json::parser("tr").parse(), + CHECK_THROWS_WITH(parse_string("tr").parse(), "[json.exception.parse_error.101] parse error at 3: syntax error - invalid literal; expected 'true'; last read 'tr'"); - CHECK_THROWS_WITH(json::parser("tru").parse(), + CHECK_THROWS_WITH(parse_string("tru").parse(), "[json.exception.parse_error.101] parse error at 4: syntax error - invalid literal; expected 'true'; last read 'tru'"); // unexpected end of false - CHECK_THROWS_AS(json::parser("f").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("fa").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("fal").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("fals").parse(), json::parse_error); - CHECK_THROWS_WITH(json::parser("f").parse(), + CHECK_THROWS_AS(parse_string("f").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("fa").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("fal").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("fals").parse(), json::parse_error); + CHECK_THROWS_WITH(parse_string("f").parse(), "[json.exception.parse_error.101] parse error at 2: syntax error - invalid literal; expected 'false'; last read 'f'"); - CHECK_THROWS_WITH(json::parser("fa").parse(), + CHECK_THROWS_WITH(parse_string("fa").parse(), "[json.exception.parse_error.101] parse error at 3: syntax error - invalid literal; expected 'false'; last read 'fa'"); - CHECK_THROWS_WITH(json::parser("fal").parse(), + CHECK_THROWS_WITH(parse_string("fal").parse(), "[json.exception.parse_error.101] parse error at 4: syntax error - invalid literal; expected 'false'; last read 'fal'"); - CHECK_THROWS_WITH(json::parser("fals").parse(), + CHECK_THROWS_WITH(parse_string("fals").parse(), "[json.exception.parse_error.101] parse error at 5: syntax error - invalid literal; expected 'false'; last read 'fals'"); // missing/unexpected end of array - CHECK_THROWS_AS(json::parser("[").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("[1").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("[1,").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("[1,]").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("]").parse(), json::parse_error); - CHECK_THROWS_WITH(json::parser("[").parse(), + CHECK_THROWS_AS(parse_string("[").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("[1").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("[1,").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("[1,]").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("]").parse(), json::parse_error); + CHECK_THROWS_WITH(parse_string("[").parse(), "[json.exception.parse_error.101] parse error at 2: syntax error - unexpected end of input"); - CHECK_THROWS_WITH(json::parser("[1").parse(), + CHECK_THROWS_WITH(parse_string("[1").parse(), "[json.exception.parse_error.101] parse error at 3: syntax error - unexpected end of input; expected ']'"); - CHECK_THROWS_WITH(json::parser("[1,").parse(), + CHECK_THROWS_WITH(parse_string("[1,").parse(), "[json.exception.parse_error.101] parse error at 4: syntax error - unexpected end of input"); - CHECK_THROWS_WITH(json::parser("[1,]").parse(), + CHECK_THROWS_WITH(parse_string("[1,]").parse(), "[json.exception.parse_error.101] parse error at 4: syntax error - unexpected ']'"); - CHECK_THROWS_WITH(json::parser("]").parse(), + CHECK_THROWS_WITH(parse_string("]").parse(), "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected ']'"); // missing/unexpected end of object - CHECK_THROWS_AS(json::parser("{").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("{\"foo\"").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("{\"foo\":").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("{\"foo\":}").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("{\"foo\":1,}").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("}").parse(), json::parse_error); - CHECK_THROWS_WITH(json::parser("{").parse(), + CHECK_THROWS_AS(parse_string("{").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("{\"foo\"").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("{\"foo\":").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("{\"foo\":}").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("{\"foo\":1,}").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("}").parse(), json::parse_error); + CHECK_THROWS_WITH(parse_string("{").parse(), "[json.exception.parse_error.101] parse error at 2: syntax error - unexpected end of input; expected string literal"); - CHECK_THROWS_WITH(json::parser("{\"foo\"").parse(), + CHECK_THROWS_WITH(parse_string("{\"foo\"").parse(), "[json.exception.parse_error.101] parse error at 7: syntax error - unexpected end of input; expected ':'"); - CHECK_THROWS_WITH(json::parser("{\"foo\":").parse(), + CHECK_THROWS_WITH(parse_string("{\"foo\":").parse(), "[json.exception.parse_error.101] parse error at 8: syntax error - unexpected end of input"); - CHECK_THROWS_WITH(json::parser("{\"foo\":}").parse(), + CHECK_THROWS_WITH(parse_string("{\"foo\":}").parse(), "[json.exception.parse_error.101] parse error at 8: syntax error - unexpected '}'"); - CHECK_THROWS_WITH(json::parser("{\"foo\":1,}").parse(), + CHECK_THROWS_WITH(parse_string("{\"foo\":1,}").parse(), "[json.exception.parse_error.101] parse error at 10: syntax error - unexpected '}'; expected string literal"); - CHECK_THROWS_WITH(json::parser("}").parse(), + CHECK_THROWS_WITH(parse_string("}").parse(), "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '}'"); // missing/unexpected end of string - CHECK_THROWS_AS(json::parser("\"").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("\"\\\"").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("\"\\u\"").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("\"\\u0\"").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("\"\\u01\"").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("\"\\u012\"").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("\"\\u").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("\"\\u0").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("\"\\u01").parse(), json::parse_error); - CHECK_THROWS_AS(json::parser("\"\\u012").parse(), json::parse_error); - CHECK_THROWS_WITH(json::parser("\"").parse(), + CHECK_THROWS_AS(parse_string("\"").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("\"\\\"").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("\"\\u\"").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("\"\\u0\"").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("\"\\u01\"").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("\"\\u012\"").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("\"\\u").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("\"\\u0").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("\"\\u01").parse(), json::parse_error); + CHECK_THROWS_AS(parse_string("\"\\u012").parse(), json::parse_error); + CHECK_THROWS_WITH(parse_string("\"").parse(), "[json.exception.parse_error.101] parse error at 2: syntax error - invalid string: missing closing quote; last read '\"'"); - CHECK_THROWS_WITH(json::parser("\"\\\"").parse(), + CHECK_THROWS_WITH(parse_string("\"\\\"").parse(), "[json.exception.parse_error.101] parse error at 4: syntax error - invalid string: missing closing quote; last read '\"\\\"'"); - CHECK_THROWS_WITH(json::parser("\"\\u\"").parse(), + CHECK_THROWS_WITH(parse_string("\"\\u\"").parse(), "[json.exception.parse_error.101] parse error at 4: syntax error - invalid string: '\\u' must be followed by 4 hex digits; last read '\"\\u\"'"); - CHECK_THROWS_WITH(json::parser("\"\\u0\"").parse(), + CHECK_THROWS_WITH(parse_string("\"\\u0\"").parse(), "[json.exception.parse_error.101] parse error at 5: syntax error - invalid string: '\\u' must be followed by 4 hex digits; last read '\"\\u0\"'"); - CHECK_THROWS_WITH(json::parser("\"\\u01\"").parse(), + CHECK_THROWS_WITH(parse_string("\"\\u01\"").parse(), "[json.exception.parse_error.101] parse error at 6: syntax error - invalid string: '\\u' must be followed by 4 hex digits; last read '\"\\u01\"'"); - CHECK_THROWS_WITH(json::parser("\"\\u012\"").parse(), + CHECK_THROWS_WITH(parse_string("\"\\u012\"").parse(), "[json.exception.parse_error.101] parse error at 7: syntax error - invalid string: '\\u' must be followed by 4 hex digits; last read '\"\\u012\"'"); - CHECK_THROWS_WITH(json::parser("\"\\u").parse(), + CHECK_THROWS_WITH(parse_string("\"\\u").parse(), "[json.exception.parse_error.101] parse error at 4: syntax error - invalid string: '\\u' must be followed by 4 hex digits; last read '\"\\u'"); - CHECK_THROWS_WITH(json::parser("\"\\u0").parse(), + CHECK_THROWS_WITH(parse_string("\"\\u0").parse(), "[json.exception.parse_error.101] parse error at 5: syntax error - invalid string: '\\u' must be followed by 4 hex digits; last read '\"\\u0'"); - CHECK_THROWS_WITH(json::parser("\"\\u01").parse(), + CHECK_THROWS_WITH(parse_string("\"\\u01").parse(), "[json.exception.parse_error.101] parse error at 6: syntax error - invalid string: '\\u' must be followed by 4 hex digits; last read '\"\\u01'"); - CHECK_THROWS_WITH(json::parser("\"\\u012").parse(), + CHECK_THROWS_WITH(parse_string("\"\\u012").parse(), "[json.exception.parse_error.101] parse error at 7: syntax error - invalid string: '\\u' must be followed by 4 hex digits; last read '\"\\u012'"); // invalid escapes @@ -511,7 +518,7 @@ TEST_CASE("parser class") case ('r'): case ('t'): { - CHECK_NOTHROW(json::parser(s.c_str()).parse()); + CHECK_NOTHROW(parse_string(s.c_str()).parse()); break; } @@ -524,11 +531,11 @@ TEST_CASE("parser class") // any other combination of backslash and character is invalid default: { - CHECK_THROWS_AS(json::parser(s.c_str()).parse(), json::parse_error); + CHECK_THROWS_AS(parse_string(s.c_str()).parse(), json::parse_error); // only check error message if c is not a control character if (c > 0x1f) { - CHECK_THROWS_WITH(json::parser(s.c_str()).parse(), + CHECK_THROWS_WITH(parse_string(s.c_str()).parse(), "[json.exception.parse_error.101] parse error at 3: syntax error - invalid string: forbidden character after backspace; last read '\"\\" + std::string(1, static_cast(c)) + "'"); } break; @@ -589,49 +596,49 @@ TEST_CASE("parser class") if (valid(c)) { CAPTURE(s1); - CHECK_NOTHROW(json::parser(s1.c_str()).parse()); + CHECK_NOTHROW(parse_string(s1.c_str()).parse()); CAPTURE(s2); - CHECK_NOTHROW(json::parser(s2.c_str()).parse()); + CHECK_NOTHROW(parse_string(s2.c_str()).parse()); CAPTURE(s3); - CHECK_NOTHROW(json::parser(s3.c_str()).parse()); + CHECK_NOTHROW(parse_string(s3.c_str()).parse()); CAPTURE(s4); - CHECK_NOTHROW(json::parser(s4.c_str()).parse()); + CHECK_NOTHROW(parse_string(s4.c_str()).parse()); } else { CAPTURE(s1); - CHECK_THROWS_AS(json::parser(s1.c_str()).parse(), json::parse_error); + CHECK_THROWS_AS(parse_string(s1.c_str()).parse(), json::parse_error); // only check error message if c is not a control character if (c > 0x1f) { - CHECK_THROWS_WITH(json::parser(s1.c_str()).parse(), + CHECK_THROWS_WITH(parse_string(s1.c_str()).parse(), "[json.exception.parse_error.101] parse error at 7: syntax error - invalid string: '\\u' must be followed by 4 hex digits; last read '" + s1.substr(0, 7) + "'"); } CAPTURE(s2); - CHECK_THROWS_AS(json::parser(s2.c_str()).parse(), json::parse_error); + CHECK_THROWS_AS(parse_string(s2.c_str()).parse(), json::parse_error); // only check error message if c is not a control character if (c > 0x1f) { - CHECK_THROWS_WITH(json::parser(s2.c_str()).parse(), + CHECK_THROWS_WITH(parse_string(s2.c_str()).parse(), "[json.exception.parse_error.101] parse error at 6: syntax error - invalid string: '\\u' must be followed by 4 hex digits; last read '" + s2.substr(0, 6) + "'"); } CAPTURE(s3); - CHECK_THROWS_AS(json::parser(s3.c_str()).parse(), json::parse_error); + CHECK_THROWS_AS(parse_string(s3.c_str()).parse(), json::parse_error); // only check error message if c is not a control character if (c > 0x1f) { - CHECK_THROWS_WITH(json::parser(s3.c_str()).parse(), + CHECK_THROWS_WITH(parse_string(s3.c_str()).parse(), "[json.exception.parse_error.101] parse error at 5: syntax error - invalid string: '\\u' must be followed by 4 hex digits; last read '" + s3.substr(0, 5) + "'"); } CAPTURE(s4); - CHECK_THROWS_AS(json::parser(s4.c_str()).parse(), json::parse_error); + CHECK_THROWS_AS(parse_string(s4.c_str()).parse(), json::parse_error); // only check error message if c is not a control character if (c > 0x1f) { - CHECK_THROWS_WITH(json::parser(s4.c_str()).parse(), + CHECK_THROWS_WITH(parse_string(s4.c_str()).parse(), "[json.exception.parse_error.101] parse error at 4: syntax error - invalid string: '\\u' must be followed by 4 hex digits; last read '" + s4.substr(0, 4) + "'"); } } @@ -657,12 +664,12 @@ TEST_CASE("parser class") SECTION("tests found by mutate++") { // test case to make sure no comma preceeds the first key - CHECK_THROWS_AS(json::parser("{,\"key\": false}").parse(), json::parse_error); - CHECK_THROWS_WITH(json::parser("{,\"key\": false}").parse(), + CHECK_THROWS_AS(parse_string("{,\"key\": false}").parse(), json::parse_error); + CHECK_THROWS_WITH(parse_string("{,\"key\": false}").parse(), "[json.exception.parse_error.101] parse error at 2: syntax error - unexpected ','; expected string literal"); // test case to make sure an object is properly closed - CHECK_THROWS_AS(json::parser("[{\"key\": false true]").parse(), json::parse_error); - CHECK_THROWS_WITH(json::parser("[{\"key\": false true]").parse(), + CHECK_THROWS_AS(parse_string("[{\"key\": false true]").parse(), json::parse_error); + CHECK_THROWS_WITH(parse_string("[{\"key\": false true]").parse(), "[json.exception.parse_error.101] parse error at 19: syntax error - unexpected true literal; expected '}'"); // test case to make sure the callback is properly evaluated after reading a key @@ -850,42 +857,42 @@ TEST_CASE("parser class") SECTION("from std::vector") { std::vector v = {'t', 'r', 'u', 'e'}; - CHECK(json::parser(std::begin(v), std::end(v)).parse() == json(true)); + CHECK(json::parser(json::input_adapter::create(std::begin(v), std::end(v))).parse() == json(true)); } SECTION("from std::array") { std::array v { {'t', 'r', 'u', 'e'} }; - CHECK(json::parser(std::begin(v), std::end(v)).parse() == json(true)); + CHECK(json::parser(json::input_adapter::create(std::begin(v), std::end(v))).parse() == json(true)); } SECTION("from array") { uint8_t v[] = {'t', 'r', 'u', 'e'}; - CHECK(json::parser(std::begin(v), std::end(v)).parse() == json(true)); + CHECK(json::parser(json::input_adapter::create(std::begin(v), std::end(v))).parse() == json(true)); } SECTION("from char literal") { - CHECK(json::parser("true").parse() == json(true)); + CHECK(parse_string("true").parse() == json(true)); } SECTION("from std::string") { std::string v = {'t', 'r', 'u', 'e'}; - CHECK(json::parser(std::begin(v), std::end(v)).parse() == json(true)); + CHECK(json::parser(json::input_adapter::create(std::begin(v), std::end(v))).parse() == json(true)); } SECTION("from std::initializer_list") { std::initializer_list v = {'t', 'r', 'u', 'e'}; - CHECK(json::parser(std::begin(v), std::end(v)).parse() == json(true)); + CHECK(json::parser(json::input_adapter::create(std::begin(v), std::end(v))).parse() == json(true)); } SECTION("from std::valarray") { std::valarray v = {'t', 'r', 'u', 'e'}; - CHECK(json::parser(std::begin(v), std::end(v)).parse() == json(true)); + CHECK(json::parser(json::input_adapter::create(std::begin(v), std::end(v))).parse() == json(true)); } } } From f3e43d7c6fcfc5f8c8b08020a13b9a44f431416d Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Sun, 9 Apr 2017 21:14:51 +0200 Subject: [PATCH 32/44] :hammer: some cleanup --- src/json.hpp | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/src/json.hpp b/src/json.hpp index 20c5b2bc..f50650df 100644 --- a/src/json.hpp +++ b/src/json.hpp @@ -8626,6 +8626,9 @@ class basic_json } }; + // a type to simplify interfaces + using input_adapter_t = std::shared_ptr; + /// input adapter for cached stream input class cached_input_stream_adapter : public input_adapter { @@ -8783,8 +8786,8 @@ class basic_json class binary_reader { public: - explicit binary_reader(std::shared_ptr a) - : ia(a), is_little_endian(little_endianess()) + explicit binary_reader(input_adapter_t adapter) + : ia(adapter), is_little_endian(little_endianess()) {} /*! @@ -9807,7 +9810,7 @@ class basic_json private: /// input adapter - std::shared_ptr ia = nullptr; + input_adapter_t ia = nullptr; /// the current character int current = std::char_traits::eof(); @@ -10761,8 +10764,8 @@ class basic_json } } - explicit lexer(std::shared_ptr a) - : ia(a), decimal_point_char(get_decimal_point()) + explicit lexer(input_adapter_t adapter) + : ia(adapter), decimal_point_char(get_decimal_point()) {} private: @@ -12118,7 +12121,7 @@ scan_number_done: private: /// input adapter - std::shared_ptr ia = nullptr; + input_adapter_t ia = nullptr; /// the current character int current = std::char_traits::eof(); @@ -12157,9 +12160,9 @@ scan_number_done: { public: /// a parser reading from an input adapter - explicit parser(std::shared_ptr ia, + explicit parser(input_adapter_t adapter, const parser_callback_t cb = nullptr) - : callback(cb), m_lexer(ia) + : callback(cb), m_lexer(adapter) {} /*! From d7e57e3b737b4d49cbfd52c064940809bc32821c Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Sun, 9 Apr 2017 22:13:26 +0200 Subject: [PATCH 33/44] :hammer: added an output adapter for the binary writer --- src/json.hpp | 198 ++++++++++++++++++++++++++++++--------------------- 1 file changed, 117 insertions(+), 81 deletions(-) diff --git a/src/json.hpp b/src/json.hpp index f50650df..eab01519 100644 --- a/src/json.hpp +++ b/src/json.hpp @@ -8775,6 +8775,46 @@ class basic_json const char* start; }; + ///////////////////// + // output adapters // + ///////////////////// + + class output_adapter + { + public: + virtual void write_character(uint8_t c) = 0; + virtual void write_characters(const uint8_t* s, size_t length) = 0; + virtual ~output_adapter() {} + + static std::shared_ptr create(std::vector& vec) + { + return std::shared_ptr(new output_vector_adapter(vec)); + } + }; + + using output_adapter_t = std::shared_ptr; + + class output_vector_adapter : public output_adapter + { + public: + output_vector_adapter(std::vector& vec) + : v(vec) + {} + + void write_character(uint8_t c) override + { + v.push_back(c); + } + + void write_characters(const uint8_t* s, size_t length) override + { + std::copy(s, s + length, std::back_inserter(v)); + } + + private: + std::vector& v; + }; + ////////////////////////////////////////// // binary serialization/deserialization // ////////////////////////////////////////// @@ -9829,32 +9869,23 @@ class basic_json : is_little_endian(little_endianess()) {} - std::vector write_cbor(const basic_json& j) - { - write_cbor_internal(j); - return v; - } + explicit binary_writer(output_adapter_t adapter) + : is_little_endian(little_endianess()), oa(adapter) + {} - std::vector write_msgpack(const basic_json& j) - { - write_msgpack_internal(j); - return v; - } - - private: - void write_cbor_internal(const basic_json& j) + void write_cbor(const basic_json& j) { switch (j.type()) { case value_t::null: { - v.push_back(0xf6); + oa->write_character(0xf6); break; } case value_t::boolean: { - v.push_back(j.m_value.boolean ? 0xf5 : 0xf4); + oa->write_character(j.m_value.boolean ? 0xf5 : 0xf4); break; } @@ -9871,22 +9902,22 @@ class basic_json } else if (j.m_value.number_integer <= (std::numeric_limits::max)()) { - v.push_back(0x18); + oa->write_character(0x18); write_number(static_cast(j.m_value.number_integer)); } else if (j.m_value.number_integer <= (std::numeric_limits::max)()) { - v.push_back(0x19); + oa->write_character(0x19); write_number(static_cast(j.m_value.number_integer)); } else if (j.m_value.number_integer <= (std::numeric_limits::max)()) { - v.push_back(0x1a); + oa->write_character(0x1a); write_number(static_cast(j.m_value.number_integer)); } else { - v.push_back(0x1b); + oa->write_character(0x1b); write_number(static_cast(j.m_value.number_integer)); } } @@ -9901,22 +9932,22 @@ class basic_json } else if (positive_number <= (std::numeric_limits::max)()) { - v.push_back(0x38); + oa->write_character(0x38); write_number(static_cast(positive_number)); } else if (positive_number <= (std::numeric_limits::max)()) { - v.push_back(0x39); + oa->write_character(0x39); write_number(static_cast(positive_number)); } else if (positive_number <= (std::numeric_limits::max)()) { - v.push_back(0x3a); + oa->write_character(0x3a); write_number(static_cast(positive_number)); } else { - v.push_back(0x3b); + oa->write_character(0x3b); write_number(static_cast(positive_number)); } } @@ -9931,22 +9962,22 @@ class basic_json } else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) { - v.push_back(0x18); + oa->write_character(0x18); write_number(static_cast(j.m_value.number_unsigned)); } else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) { - v.push_back(0x19); + oa->write_character(0x19); write_number(static_cast(j.m_value.number_unsigned)); } else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) { - v.push_back(0x1a); + oa->write_character(0x1a); write_number(static_cast(j.m_value.number_unsigned)); } else { - v.push_back(0x1b); + oa->write_character(0x1b); write_number(static_cast(j.m_value.number_unsigned)); } break; @@ -9955,7 +9986,7 @@ class basic_json case value_t::number_float: { // Double-Precision Float - v.push_back(0xfb); + oa->write_character(0xfb); write_number(j.m_value.number_float); break; } @@ -9969,30 +10000,30 @@ class basic_json } else if (N <= 0xff) { - v.push_back(0x78); + oa->write_character(0x78); write_number(static_cast(N)); } else if (N <= 0xffff) { - v.push_back(0x79); + oa->write_character(0x79); write_number(static_cast(N)); } else if (N <= 0xffffffff) { - v.push_back(0x7a); + oa->write_character(0x7a); write_number(static_cast(N)); } // LCOV_EXCL_START else if (N <= 0xffffffffffffffff) { - v.push_back(0x7b); + oa->write_character(0x7b); write_number(static_cast(N)); } // LCOV_EXCL_STOP // append string - std::copy(j.m_value.string->begin(), j.m_value.string->end(), - std::back_inserter(v)); + oa->write_characters(reinterpret_cast(j.m_value.string->c_str()), + j.m_value.string->size()); break; } @@ -10005,23 +10036,23 @@ class basic_json } else if (N <= 0xff) { - v.push_back(0x98); + oa->write_character(0x98); write_number(static_cast(N)); } else if (N <= 0xffff) { - v.push_back(0x99); + oa->write_character(0x99); write_number(static_cast(N)); } else if (N <= 0xffffffff) { - v.push_back(0x9a); + oa->write_character(0x9a); write_number(static_cast(N)); } // LCOV_EXCL_START else if (N <= 0xffffffffffffffff) { - v.push_back(0x9b); + oa->write_character(0x9b); write_number(static_cast(N)); } // LCOV_EXCL_STOP @@ -10029,7 +10060,7 @@ class basic_json // append each element for (const auto& el : *j.m_value.array) { - write_cbor_internal(el); + write_cbor(el); } break; } @@ -10043,23 +10074,23 @@ class basic_json } else if (N <= 0xff) { - v.push_back(0xb8); + oa->write_character(0xb8); write_number(static_cast(N)); } else if (N <= 0xffff) { - v.push_back(0xb9); + oa->write_character(0xb9); write_number(static_cast(N)); } else if (N <= 0xffffffff) { - v.push_back(0xba); + oa->write_character(0xba); write_number(static_cast(N)); } // LCOV_EXCL_START else if (N <= 0xffffffffffffffff) { - v.push_back(0xbb); + oa->write_character(0xbb); write_number(static_cast(N)); } // LCOV_EXCL_STOP @@ -10067,8 +10098,8 @@ class basic_json // append each element for (const auto& el : *j.m_value.object) { - write_cbor_internal(el.first); - write_cbor_internal(el.second); + write_cbor(el.first); + write_cbor(el.second); } break; } @@ -10080,21 +10111,21 @@ class basic_json } } - void write_msgpack_internal(const basic_json& j) + void write_msgpack(const basic_json& j) { switch (j.type()) { case value_t::null: { // nil - v.push_back(0xc0); + oa->write_character(0xc0); break; } case value_t::boolean: { // true and false - v.push_back(j.m_value.boolean ? 0xc3 : 0xc2); + oa->write_character(j.m_value.boolean ? 0xc3 : 0xc2); break; } @@ -10114,25 +10145,25 @@ class basic_json else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) { // uint 8 - v.push_back(0xcc); + oa->write_character(0xcc); write_number(static_cast(j.m_value.number_integer)); } else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) { // uint 16 - v.push_back(0xcd); + oa->write_character(0xcd); write_number(static_cast(j.m_value.number_integer)); } else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) { // uint 32 - v.push_back(0xce); + oa->write_character(0xce); write_number(static_cast(j.m_value.number_integer)); } else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) { // uint 64 - v.push_back(0xcf); + oa->write_character(0xcf); write_number(static_cast(j.m_value.number_integer)); } } @@ -10146,25 +10177,25 @@ class basic_json else if (j.m_value.number_integer >= (std::numeric_limits::min)() and j.m_value.number_integer <= (std::numeric_limits::max)()) { // int 8 - v.push_back(0xd0); + oa->write_character(0xd0); write_number(static_cast(j.m_value.number_integer)); } else if (j.m_value.number_integer >= (std::numeric_limits::min)() and j.m_value.number_integer <= (std::numeric_limits::max)()) { // int 16 - v.push_back(0xd1); + oa->write_character(0xd1); write_number(static_cast(j.m_value.number_integer)); } else if (j.m_value.number_integer >= (std::numeric_limits::min)() and j.m_value.number_integer <= (std::numeric_limits::max)()) { // int 32 - v.push_back(0xd2); + oa->write_character(0xd2); write_number(static_cast(j.m_value.number_integer)); } else if (j.m_value.number_integer >= (std::numeric_limits::min)() and j.m_value.number_integer <= (std::numeric_limits::max)()) { // int 64 - v.push_back(0xd3); + oa->write_character(0xd3); write_number(static_cast(j.m_value.number_integer)); } } @@ -10181,25 +10212,25 @@ class basic_json else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) { // uint 8 - v.push_back(0xcc); + oa->write_character(0xcc); write_number(static_cast(j.m_value.number_integer)); } else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) { // uint 16 - v.push_back(0xcd); + oa->write_character(0xcd); write_number(static_cast(j.m_value.number_integer)); } else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) { // uint 32 - v.push_back(0xce); + oa->write_character(0xce); write_number(static_cast(j.m_value.number_integer)); } else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) { // uint 64 - v.push_back(0xcf); + oa->write_character(0xcf); write_number(static_cast(j.m_value.number_integer)); } break; @@ -10208,7 +10239,7 @@ class basic_json case value_t::number_float: { // float 64 - v.push_back(0xcb); + oa->write_character(0xcb); write_number(j.m_value.number_float); break; } @@ -10224,25 +10255,25 @@ class basic_json else if (N <= 255) { // str 8 - v.push_back(0xd9); + oa->write_character(0xd9); write_number(static_cast(N)); } else if (N <= 65535) { // str 16 - v.push_back(0xda); + oa->write_character(0xda); write_number(static_cast(N)); } else if (N <= 4294967295) { // str 32 - v.push_back(0xdb); + oa->write_character(0xdb); write_number(static_cast(N)); } // append string - std::copy(j.m_value.string->begin(), j.m_value.string->end(), - std::back_inserter(v)); + oa->write_characters(reinterpret_cast(j.m_value.string->c_str()), + j.m_value.string->size()); break; } @@ -10257,20 +10288,20 @@ class basic_json else if (N <= 0xffff) { // array 16 - v.push_back(0xdc); + oa->write_character(0xdc); write_number(static_cast(N)); } else if (N <= 0xffffffff) { // array 32 - v.push_back(0xdd); + oa->write_character(0xdd); write_number(static_cast(N)); } // append each element for (const auto& el : *j.m_value.array) { - write_msgpack_internal(el); + write_msgpack(el); } break; } @@ -10286,21 +10317,21 @@ class basic_json else if (N <= 65535) { // map 16 - v.push_back(0xde); + oa->write_character(0xde); write_number(static_cast(N)); } else if (N <= 4294967295) { // map 32 - v.push_back(0xdf); + oa->write_character(0xdf); write_number(static_cast(N)); } // append each element for (const auto& el : *j.m_value.object) { - write_msgpack_internal(el.first); - write_msgpack_internal(el.second); + write_msgpack(el.first); + write_msgpack(el.second); } break; } @@ -10312,6 +10343,7 @@ class basic_json } } + private: template void write_number(T n) { @@ -10323,11 +10355,11 @@ class basic_json // reverse byte order prior to conversion if necessary if (is_little_endian) { - v.push_back(vec[sizeof(T) - i - 1]); + oa->write_character(vec[sizeof(T) - i - 1]); } else { - v.push_back(vec[i]); + oa->write_character(vec[i]); } } } @@ -10343,8 +10375,8 @@ class basic_json /// whether we can assume little endianess const bool is_little_endian = true; - /// the vector that is used as output - std::vector v {}; + /// the output + output_adapter_t oa = nullptr; }; public: @@ -10432,8 +10464,10 @@ class basic_json */ static std::vector to_cbor(const basic_json& j) { - binary_writer bw; - return bw.write_cbor(j); + std::vector result; + binary_writer bw(output_adapter::create(result)); + bw.write_cbor(j); + return result; } /*! @@ -10512,8 +10546,10 @@ class basic_json */ static std::vector to_msgpack(const basic_json& j) { - binary_writer bw; - return bw.write_msgpack(j); + std::vector result; + binary_writer bw(output_adapter::create(result)); + bw.write_msgpack(j); + return result; } /*! From 717106ecedb9cc42882a3964a5afc4016dae6382 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Fri, 14 Apr 2017 19:49:05 +0200 Subject: [PATCH 34/44] :hammer: templated output_adapter and used in class serializer --- src/json.hpp | 248 +++++++++++++++++++++------------- test/src/unit-convenience.cpp | 2 +- 2 files changed, 154 insertions(+), 96 deletions(-) diff --git a/src/json.hpp b/src/json.hpp index 2fcb368e..f7fbfd71 100644 --- a/src/json.hpp +++ b/src/json.hpp @@ -2897,8 +2897,8 @@ class basic_json */ string_t dump(const int indent = -1) const { - std::stringstream ss; - serializer s(ss); + string_t result; + serializer s(output_adapter::create(result)); if (indent >= 0) { @@ -2909,7 +2909,7 @@ class basic_json s.dump(*this, false, 0); } - return ss.str(); + return result; } /*! @@ -6554,6 +6554,104 @@ class basic_json /// @} + private: + ///////////////////// + // output adapters // + ///////////////////// + + template + class output_adapter + { + public: + virtual void write_character(CharType c) = 0; + virtual void write_characters(const CharType* s, size_t length) = 0; + virtual ~output_adapter() {} + + static std::shared_ptr> create(std::vector& vec) + { + return std::shared_ptr(new output_vector_adapter(vec)); + } + + static std::shared_ptr> create(std::ostream& s) + { + return std::shared_ptr(new output_stream_adapter(s)); + } + + static std::shared_ptr> create(std::string& s) + { + return std::shared_ptr(new output_string_adapter(s)); + } + }; + + template + using output_adapter_t = std::shared_ptr>; + + template + class output_vector_adapter : public output_adapter + { + public: + output_vector_adapter(std::vector& vec) + : v(vec) + {} + + void write_character(CharType c) override + { + v.push_back(c); + } + + void write_characters(const CharType* s, size_t length) override + { + std::copy(s, s + length, std::back_inserter(v)); + } + + private: + std::vector& v; + }; + + template + class output_stream_adapter : public output_adapter + { + public: + output_stream_adapter(std::basic_ostream& s) + : stream(s) + {} + + void write_character(CharType c) override + { + stream.put(c); + } + + void write_characters(const CharType* s, size_t length) override + { + stream.write(s, static_cast(length)); + } + + private: + std::basic_ostream& stream; + }; + + template + class output_string_adapter : public output_adapter + { + public: + output_string_adapter(std::string& s) + : str(s) + {} + + void write_character(CharType c) override + { + str.push_back(c); + } + + void write_characters(const CharType* s, size_t length) override + { + str.append(s, length); + } + + private: + std::basic_string& str; + }; + /////////////////// // serialization // @@ -6576,7 +6674,7 @@ class basic_json /*! @param[in] s output stream to serialize to */ - serializer(std::ostream& s) + serializer(output_adapter_t s) : o(s), loc(std::localeconv()), thousands_sep(!loc->thousands_sep ? '\0' : loc->thousands_sep[0]), decimal_point(!loc->decimal_point ? '\0' : loc->decimal_point[0]) @@ -6610,13 +6708,13 @@ class basic_json { if (val.m_value.object->empty()) { - o.write("{}", 2); + o->write_characters("{}", 2); return; } if (pretty_print) { - o.write("{\n", 2); + o->write_characters("{\n", 2); // variable to hold indentation for recursive calls const auto new_indent = current_indent + indent_step; @@ -6629,49 +6727,49 @@ class basic_json auto i = val.m_value.object->cbegin(); for (size_t cnt = 0; cnt < val.m_value.object->size() - 1; ++cnt, ++i) { - o.write(indent_string.c_str(), static_cast(new_indent)); - o.put('\"'); + o->write_characters(indent_string.c_str(), new_indent); + o->write_character('\"'); dump_escaped(i->first); - o.write("\": ", 3); + o->write_characters("\": ", 3); dump(i->second, true, indent_step, new_indent); - o.write(",\n", 2); + o->write_characters(",\n", 2); } // last element assert(i != val.m_value.object->cend()); - o.write(indent_string.c_str(), static_cast(new_indent)); - o.put('\"'); + o->write_characters(indent_string.c_str(), new_indent); + o->write_character('\"'); dump_escaped(i->first); - o.write("\": ", 3); + o->write_characters("\": ", 3); dump(i->second, true, indent_step, new_indent); - o.put('\n'); - o.write(indent_string.c_str(), static_cast(current_indent)); - o.put('}'); + o->write_character('\n'); + o->write_characters(indent_string.c_str(), current_indent); + o->write_character('}'); } else { - o.put('{'); + o->write_character('{'); // first n-1 elements auto i = val.m_value.object->cbegin(); for (size_t cnt = 0; cnt < val.m_value.object->size() - 1; ++cnt, ++i) { - o.put('\"'); + o->write_character('\"'); dump_escaped(i->first); - o.write("\":", 2); + o->write_characters("\":", 2); dump(i->second, false, indent_step, current_indent); - o.put(','); + o->write_character(','); } // last element assert(i != val.m_value.object->cend()); - o.put('\"'); + o->write_character('\"'); dump_escaped(i->first); - o.write("\":", 2); + o->write_characters("\":", 2); dump(i->second, false, indent_step, current_indent); - o.put('}'); + o->write_character('}'); } return; @@ -6681,13 +6779,13 @@ class basic_json { if (val.m_value.array->empty()) { - o.write("[]", 2); + o->write_characters("[]", 2); return; } if (pretty_print) { - o.write("[\n", 2); + o->write_characters("[\n", 2); // variable to hold indentation for recursive calls const auto new_indent = current_indent + indent_step; @@ -6699,36 +6797,36 @@ class basic_json // first n-1 elements for (auto i = val.m_value.array->cbegin(); i != val.m_value.array->cend() - 1; ++i) { - o.write(indent_string.c_str(), static_cast(new_indent)); + o->write_characters(indent_string.c_str(), new_indent); dump(*i, true, indent_step, new_indent); - o.write(",\n", 2); + o->write_characters(",\n", 2); } // last element assert(not val.m_value.array->empty()); - o.write(indent_string.c_str(), static_cast(new_indent)); + o->write_characters(indent_string.c_str(), new_indent); dump(val.m_value.array->back(), true, indent_step, new_indent); - o.put('\n'); - o.write(indent_string.c_str(), static_cast(current_indent)); - o.put(']'); + o->write_character('\n'); + o->write_characters(indent_string.c_str(), current_indent); + o->write_character(']'); } else { - o.put('['); + o->write_character('['); // first n-1 elements for (auto i = val.m_value.array->cbegin(); i != val.m_value.array->cend() - 1; ++i) { dump(*i, false, indent_step, current_indent); - o.put(','); + o->write_character(','); } // last element assert(not val.m_value.array->empty()); dump(val.m_value.array->back(), false, indent_step, current_indent); - o.put(']'); + o->write_character(']'); } return; @@ -6736,9 +6834,9 @@ class basic_json case value_t::string: { - o.put('\"'); + o->write_character('\"'); dump_escaped(*val.m_value.string); - o.put('\"'); + o->write_character('\"'); return; } @@ -6746,11 +6844,11 @@ class basic_json { if (val.m_value.boolean) { - o.write("true", 4); + o->write_characters("true", 4); } else { - o.write("false", 5); + o->write_characters("false", 5); } return; } @@ -6775,13 +6873,13 @@ class basic_json case value_t::discarded: { - o.write("", 11); + o->write_characters("", 11); return; } case value_t::null: { - o.write("null", 4); + o->write_characters("null", 4); return; } } @@ -6872,7 +6970,7 @@ class basic_json const auto space = extra_space(s); if (space == 0) { - o.write(s.c_str(), static_cast(s.size())); + o->write_characters(s.c_str(), s.size()); return; } @@ -6998,7 +7096,7 @@ class basic_json } assert(pos == s.size() + space); - o.write(result.c_str(), static_cast(result.size())); + o->write_characters(result.c_str(), result.size()); } /*! @@ -7018,7 +7116,7 @@ class basic_json // special case for "0" if (x == 0) { - o.put('0'); + o->write_character('0'); return; } @@ -7044,7 +7142,7 @@ class basic_json } std::reverse(number_buffer.begin(), number_buffer.begin() + i); - o.write(number_buffer.data(), static_cast(i)); + o->write_characters(number_buffer.data(), i); } /*! @@ -7060,7 +7158,7 @@ class basic_json // NaN / inf if (not std::isfinite(x) or std::isnan(x)) { - o.write("null", 4); + o->write_characters("null", 4); return; } @@ -7069,11 +7167,11 @@ class basic_json { if (std::signbit(x)) { - o.write("-0.0", 4); + o->write_characters("-0.0", 4); } else { - o.write("0.0", 3); + o->write_characters("0.0", 3); } return; } @@ -7114,7 +7212,7 @@ class basic_json } } - o.write(number_buffer.data(), static_cast(len)); + o->write_characters(number_buffer.data(), static_cast(len)); // determine if need to append ".0" const bool value_is_int_like = std::none_of(number_buffer.begin(), @@ -7126,13 +7224,13 @@ class basic_json if (value_is_int_like) { - o.write(".0", 2); + o->write_characters(".0", 2); } } private: /// the output of the serializer - std::ostream& o; + output_adapter_t o = nullptr; /// a (hopefully) large enough character buffer std::array number_buffer{{}}; @@ -7181,7 +7279,7 @@ class basic_json o.width(0); // do the actual serialization - serializer s(o); + serializer s(output_adapter::create(o)); s.dump(j, pretty_print, static_cast(indentation)); return o; } @@ -8778,46 +8876,6 @@ class basic_json const char* start; }; - ///////////////////// - // output adapters // - ///////////////////// - - class output_adapter - { - public: - virtual void write_character(uint8_t c) = 0; - virtual void write_characters(const uint8_t* s, size_t length) = 0; - virtual ~output_adapter() {} - - static std::shared_ptr create(std::vector& vec) - { - return std::shared_ptr(new output_vector_adapter(vec)); - } - }; - - using output_adapter_t = std::shared_ptr; - - class output_vector_adapter : public output_adapter - { - public: - output_vector_adapter(std::vector& vec) - : v(vec) - {} - - void write_character(uint8_t c) override - { - v.push_back(c); - } - - void write_characters(const uint8_t* s, size_t length) override - { - std::copy(s, s + length, std::back_inserter(v)); - } - - private: - std::vector& v; - }; - ////////////////////////////////////////// // binary serialization/deserialization // ////////////////////////////////////////// @@ -9872,7 +9930,7 @@ class basic_json : is_little_endian(little_endianess()) {} - explicit binary_writer(output_adapter_t adapter) + explicit binary_writer(output_adapter_t adapter) : is_little_endian(little_endianess()), oa(adapter) {} @@ -10379,7 +10437,7 @@ class basic_json const bool is_little_endian = true; /// the output - output_adapter_t oa = nullptr; + output_adapter_t oa = nullptr; }; public: @@ -10468,7 +10526,7 @@ class basic_json static std::vector to_cbor(const basic_json& j) { std::vector result; - binary_writer bw(output_adapter::create(result)); + binary_writer bw(output_adapter::create(result)); bw.write_cbor(j); return result; } @@ -10550,7 +10608,7 @@ class basic_json static std::vector to_msgpack(const basic_json& j) { std::vector result; - binary_writer bw(output_adapter::create(result)); + binary_writer bw(output_adapter::create(result)); bw.write_msgpack(j); return result; } diff --git a/test/src/unit-convenience.cpp b/test/src/unit-convenience.cpp index 33556311..5e16962d 100644 --- a/test/src/unit-convenience.cpp +++ b/test/src/unit-convenience.cpp @@ -53,7 +53,7 @@ TEST_CASE("convenience functions") const char* escaped) { std::stringstream ss; - json::serializer s(ss); + json::serializer s(json::output_adapter::create(ss)); s.dump_escaped(original); CHECK(ss.str() == escaped); }; From db9bf953f310014835e3bf5108877e1c3d2b25f7 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Sat, 15 Apr 2017 10:40:10 +0200 Subject: [PATCH 35/44] :hammer: improved diagnostic output --- src/json.hpp | 51 +++++++++++++++++++++------------- test/src/unit-class_parser.cpp | 16 +++++------ 2 files changed, 39 insertions(+), 28 deletions(-) diff --git a/src/json.hpp b/src/json.hpp index f7fbfd71..82f69abe 100644 --- a/src/json.hpp +++ b/src/json.hpp @@ -11127,6 +11127,13 @@ class basic_json return codepoint; } + static std::string codepoint_to_string(int codepoint) + { + std::stringstream ss; + ss << "U+" << std::setw(4) << std::uppercase << std::setfill('0') << std::hex << codepoint; + return ss.str(); + } + token_type scan_string() { // reset yytext (ignore opening quote) @@ -11237,13 +11244,13 @@ class basic_json } else { - error_message = "invalid string: invalid low surrogate"; + error_message = "invalid string: surrogate " + codepoint_to_string(codepoint1) + " must be followed by U+DC00..U+DFFF instead of " + codepoint_to_string(codepoint2); return token_type::parse_error; } } else { - error_message = "invalid string: missing low surrogate"; + error_message = "invalid string: surrogate " + codepoint_to_string(codepoint1) + " must be followed by U+DC00..U+DFFF"; return token_type::parse_error; } } @@ -11251,7 +11258,7 @@ class basic_json { if (JSON_UNLIKELY(0xDC00 <= codepoint1 and codepoint1 <= 0xDFFF)) { - error_message = "invalid string: missing high surrogate"; + error_message = "invalid string: surrogate " + codepoint_to_string(codepoint1) + " must follow U+D800..U+DBFF"; return token_type::parse_error; } @@ -11336,7 +11343,7 @@ class basic_json case 0x1e: case 0x1f: { - error_message = "invalid string: control characters (U+0000 through U+001f) must be escaped"; + error_message = "invalid string: control character " + codepoint_to_string(current) + " must be escaped"; return token_type::parse_error; } @@ -11480,7 +11487,7 @@ class basic_json continue; } - error_message = "invalid string: not well-formed UTF-8 byte"; + error_message = "invalid string: ill-formed UTF-8 byte"; return token_type::parse_error; } @@ -11500,7 +11507,7 @@ class basic_json } } - error_message = "invalid string: not well-formed UTF-8 byte"; + error_message = "invalid string: ill-formed UTF-8 byte"; return token_type::parse_error; } @@ -11534,7 +11541,7 @@ class basic_json } } - error_message = "invalid string: not well-formed UTF-8 byte"; + error_message = "invalid string: ill-formed UTF-8 byte"; return token_type::parse_error; } @@ -11554,7 +11561,7 @@ class basic_json } } - error_message = "invalid string: not well-formed UTF-8 byte"; + error_message = "invalid string: ill-formed UTF-8 byte"; return token_type::parse_error; } @@ -11579,7 +11586,7 @@ class basic_json } } - error_message = "invalid string: not well-formed UTF-8 byte"; + error_message = "invalid string: ill-formed UTF-8 byte"; return token_type::parse_error; } @@ -11606,7 +11613,7 @@ class basic_json } } - error_message = "invalid string: not well-formed UTF-8 byte"; + error_message = "invalid string: ill-formed UTF-8 byte"; return token_type::parse_error; } @@ -11631,14 +11638,14 @@ class basic_json } } - error_message = "invalid string: not well-formed UTF-8 byte"; + error_message = "invalid string: ill-formed UTF-8 byte"; return token_type::parse_error; } - // remaining bytes (80..C1 and F5..FF) are not well-formed + // remaining bytes (80..C1 and F5..FF) are ill-formed default: { - error_message = "invalid string: not well-formed UTF-8 byte"; + error_message = "invalid string: ill-formed UTF-8 byte"; return token_type::parse_error; } } @@ -11681,7 +11688,7 @@ class basic_json // be changed if minus sign, decimal point or exponent is read token_type number_type = token_type::value_unsigned; - // state: we just found out we need to scan a number + // state (init): we just found out we need to scan a number switch (current) { case '-': @@ -12001,6 +12008,8 @@ scan_number_done: } } + // this code is reached if we parse a floating-point number or if + // an integer conversion above failed strtof(value_float, yytext.data(), nullptr); return token_type::value_float; } @@ -12064,7 +12073,8 @@ scan_number_done: /// add a character to yytext void add(int c) { - // resize yytext if necessary + // resize yytext if necessary; this condition is deemed unlikely, + // because we start with a 1024-byte buffer if (JSON_UNLIKELY((yylen + 1 > yytext.capacity()))) { yytext.resize(2 * yytext.capacity(), '\0'); @@ -12120,7 +12130,7 @@ scan_number_done: std::string s = ia->read(start_pos, chars_read - start_pos); // escape control characters - std::stringstream ss; + std::string result; for (auto c : s) { if (c == '\0' or c == std::char_traits::eof()) @@ -12131,16 +12141,16 @@ scan_number_done: else if ('\x00' <= c and c <= '\x1f') { // escape control characters - ss << ""; + result += "<" + codepoint_to_string(c) + ">"; } else { // add character as is - ss << c; + result.append(1, c); } } - return ss.str(); + return result; } /// return syntax error message @@ -12204,7 +12214,8 @@ scan_number_done: case '9': return scan_number(); - // end of input + // end of input (the null byte is needed when parsing from + // string literals) case '\0': case std::char_traits::eof(): return token_type::end_of_input; diff --git a/test/src/unit-class_parser.cpp b/test/src/unit-class_parser.cpp index e0fffac4..b631a978 100644 --- a/test/src/unit-class_parser.cpp +++ b/test/src/unit-class_parser.cpp @@ -98,18 +98,18 @@ TEST_CASE("parser class") // error: tab in string CHECK_THROWS_AS(parse_string("\"\t\"").parse(), json::parse_error); CHECK_THROWS_WITH(parse_string("\"\t\"").parse(), - "[json.exception.parse_error.101] parse error at 2: syntax error - invalid string: control characters (U+0000 through U+001f) must be escaped; last read '\"'"); + "[json.exception.parse_error.101] parse error at 2: syntax error - invalid string: control character U+0009 must be escaped; last read '\"'"); // error: newline in string CHECK_THROWS_AS(parse_string("\"\n\"").parse(), json::parse_error); CHECK_THROWS_AS(parse_string("\"\r\"").parse(), json::parse_error); CHECK_THROWS_WITH(parse_string("\"\n\"").parse(), - "[json.exception.parse_error.101] parse error at 2: syntax error - invalid string: control characters (U+0000 through U+001f) must be escaped; last read '\"'"); + "[json.exception.parse_error.101] parse error at 2: syntax error - invalid string: control character U+000A must be escaped; last read '\"'"); CHECK_THROWS_WITH(parse_string("\"\r\"").parse(), - "[json.exception.parse_error.101] parse error at 2: syntax error - invalid string: control characters (U+0000 through U+001f) must be escaped; last read '\"'"); + "[json.exception.parse_error.101] parse error at 2: syntax error - invalid string: control character U+000D must be escaped; last read '\"'"); // error: backspace in string CHECK_THROWS_AS(parse_string("\"\b\"").parse(), json::parse_error); CHECK_THROWS_WITH(parse_string("\"\b\"").parse(), - "[json.exception.parse_error.101] parse error at 2: syntax error - invalid string: control characters (U+0000 through U+001f) must be escaped; last read '\"'"); + "[json.exception.parse_error.101] parse error at 2: syntax error - invalid string: control character U+0008 must be escaped; last read '\"'"); // improve code coverage CHECK_THROWS_AS(parse_string("\uFF01").parse(), json::parse_error); CHECK_THROWS_AS(parse_string("[-4:1,]").parse(), json::parse_error); @@ -648,17 +648,17 @@ TEST_CASE("parser class") // missing part of a surrogate pair CHECK_THROWS_AS(json::parse("\"\\uD80C\""), json::parse_error); CHECK_THROWS_WITH(json::parse("\"\\uD80C\""), - "[json.exception.parse_error.101] parse error at 8: syntax error - invalid string: missing low surrogate; last read '\"\\uD80C\"'"); + "[json.exception.parse_error.101] parse error at 8: syntax error - invalid string: surrogate U+D80C must be followed by U+DC00..U+DFFF; last read '\"\\uD80C\"'"); // invalid surrogate pair CHECK_THROWS_AS(json::parse("\"\\uD80C\\uD80C\""), json::parse_error); CHECK_THROWS_AS(json::parse("\"\\uD80C\\u0000\""), json::parse_error); CHECK_THROWS_AS(json::parse("\"\\uD80C\\uFFFF\""), json::parse_error); CHECK_THROWS_WITH(json::parse("\"\\uD80C\\uD80C\""), - "[json.exception.parse_error.101] parse error at 13: syntax error - invalid string: invalid low surrogate; last read '\"\\uD80C\\uD80C'"); + "[json.exception.parse_error.101] parse error at 13: syntax error - invalid string: surrogate U+D80C must be followed by U+DC00..U+DFFF instead of U+D80C; last read '\"\\uD80C\\uD80C'"); CHECK_THROWS_WITH(json::parse("\"\\uD80C\\u0000\""), - "[json.exception.parse_error.101] parse error at 13: syntax error - invalid string: invalid low surrogate; last read '\"\\uD80C\\u0000'"); + "[json.exception.parse_error.101] parse error at 13: syntax error - invalid string: surrogate U+D80C must be followed by U+DC00..U+DFFF instead of U+0000; last read '\"\\uD80C\\u0000'"); CHECK_THROWS_WITH(json::parse("\"\\uD80C\\uFFFF\""), - "[json.exception.parse_error.101] parse error at 13: syntax error - invalid string: invalid low surrogate; last read '\"\\uD80C\\uFFFF'"); + "[json.exception.parse_error.101] parse error at 13: syntax error - invalid string: surrogate U+D80C must be followed by U+DC00..U+DFFF instead of U+FFFF; last read '\"\\uD80C\\uFFFF'"); } SECTION("tests found by mutate++") From e24df7eca98ae8590616c103cafcff9ed3fa5b7c Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Sun, 23 Apr 2017 15:10:40 +0200 Subject: [PATCH 36/44] :memo: improved documentation --- src/json.hpp | 250 +++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 214 insertions(+), 36 deletions(-) diff --git a/src/json.hpp b/src/json.hpp index 1db82e12..99a6e854 100644 --- a/src/json.hpp +++ b/src/json.hpp @@ -6605,6 +6605,7 @@ class basic_json // output adapters // ///////////////////// + /// abstract output adapter interface template class output_adapter { @@ -6629,9 +6630,11 @@ class basic_json } }; + /// a type to simplify interfaces template using output_adapter_t = std::shared_ptr>; + /// output adapter for byte vectors template class output_vector_adapter : public output_adapter { @@ -6654,6 +6657,7 @@ class basic_json std::vector& v; }; + /// putput adatpter for output streams template class output_stream_adapter : public output_adapter { @@ -6676,6 +6680,7 @@ class basic_json std::basic_ostream& stream; }; + /// output adapter for basic_string template class output_string_adapter : public output_adapter { @@ -8773,7 +8778,7 @@ class basic_json } }; - // a type to simplify interfaces + /// a type to simplify interfaces using input_adapter_t = std::shared_ptr; /// input adapter for cached stream input @@ -8930,17 +8935,34 @@ class basic_json /// @{ private: + /*! + @brief deserialization of CBOR and MessagePack values + */ class binary_reader { public: + /*! + @brief create a binary reader + + @param[in] adapter input adapter to read from + */ explicit binary_reader(input_adapter_t adapter) : ia(adapter), is_little_endian(little_endianess()) - {} + { + assert(ia); + } /*! + @brief create a JSON value from CBOR input + @param[in] get_char whether a new character should be retrieved from the input (true, default) or whether the last read character should be considered instead + + @return JSON value created from CBOR input + + @throw parse_error.110 if input ended unexpectedly + @throw parse_error.112 if unsupported byte was read */ basic_json parse_cbor(const bool get_char = true) { @@ -9297,12 +9319,13 @@ class basic_json check_eof(); // code from RFC 7049, Appendix D, Figure 3: - // As half-precision floating-point numbers were only added to - // IEEE 754 in 2008, today's programming platforms often still - // only have limited support for them. It is very easy to - // include at least decoding support for them even without such - // support. An example of a small decoder for half-precision - // floating-point numbers in the C language is shown in Fig. 3. + // As half-precision floating-point numbers were only added + // to IEEE 754 in 2008, today's programming platforms often + // still only have limited support for them. It is very + // easy to include at least decoding support for them even + // without such support. An example of a small decoder for + // half-precision floating-point numbers in the C language + // is shown in Fig. 3. const int half = (byte1 << 8) + byte2; const int exp = (half >> 10) & 0x1f; const int mant = half & 0x3ff; @@ -9343,6 +9366,14 @@ class basic_json } } + /*! + @brief create a JSON value from MessagePack input + + @return JSON value created from MessagePack input + + @throw parse_error.110 if input ended unexpectedly + @throw parse_error.112 if unsupported byte was read + */ basic_json parse_msgpack() { switch (get()) @@ -9745,23 +9776,52 @@ class basic_json } } - private: - // from http://stackoverflow.com/a/1001328/266378 - static bool little_endianess() + /*! + @brief determine system byte order + + @return true iff system's byte order is little endian + + @note from http://stackoverflow.com/a/1001328/266378 + */ + static bool little_endianess() noexcept { int num = 1; return (*reinterpret_cast(&num) == 1); } + private: + /*! + @brief get next character from the input + + This function provides the interface to the used input adapter. It does + not throw in case the input reached EOF, but returns + `std::char_traits::eof()` in that case. + + @return character read from the input + */ int get() { ++chars_read; return (current = ia->get_character()); } + /* + @brief read a number from the input + + @tparam T the type of the number + + @return number of type @a T + + @note This function needs to respect the system's endianess, because + bytes in CBOR and MessagePack are stored in network order (big + endian) and therefore need reordering on little endian systems. + + @throw parse_error.110 if input has less than `sizeof(T)` bytes + */ template T get_number() { + // step 1: read input into array with system's byte order std::array vec; for (size_t i = 0; i < sizeof(T); ++i) { @@ -9779,11 +9839,21 @@ class basic_json } } + // step 2: convert array into number of type T and return T result; std::memcpy(&result, vec.data(), sizeof(T)); return result; } + /*! + @brief create a string by reading characters from the input + + @param[in] len number of bytes to read + + @return string created by reading @a len bytes + + @throw parse_error.110 if input has less than @a len bytes + */ std::string get_string(const size_t len) { std::string result; @@ -9796,6 +9866,18 @@ class basic_json return result; } + /*! + @brief reads a CBOR string + + This function first reads starting bytes to determine the expected + string length and then copies this number of bytes into a string. + Additionally, CBOR's strings with indefinite lengths are supported. + + @return string + + @throw parse_error.110 if input ended + @throw parse_error.113 if an unexpexted byte is read + */ std::string get_cbor_string() { check_eof(); @@ -9876,6 +9958,17 @@ class basic_json } } + /*! + @brief reads a MessagePack string + + This function first reads starting bytes to determine the expected + string length and then copies this number of bytes into a string. + + @return string + + @throw parse_error.110 if input ended + @throw parse_error.113 if an unexpexted byte is read + */ std::string get_msgpack_string() { check_eof(); @@ -9947,7 +10040,11 @@ class basic_json } } - void check_eof() + /*! + @brief check if input ended + @throw parse_error.110 if input ended + */ + void check_eof() const { if (JSON_UNLIKELY(current == std::char_traits::eof())) { @@ -9969,17 +10066,26 @@ class basic_json const bool is_little_endian = true; }; + /*! + @brief serialization to CBOR and MessagePack values + */ class binary_writer { public: - binary_writer() - : is_little_endian(little_endianess()) - {} + /*! + @brief create a binary writer + @param[in] adapter output adapter to write to + */ explicit binary_writer(output_adapter_t adapter) - : is_little_endian(little_endianess()), oa(adapter) - {} + : is_little_endian(binary_reader::little_endianess()), oa(adapter) + { + assert(oa); + } + /*! + @brief[in] j JSON value to serialize + */ void write_cbor(const basic_json& j) { switch (j.type()) @@ -10100,6 +10206,7 @@ class basic_json case value_t::string: { + // step 1: write control byte and the string length const auto N = j.m_value.string->size(); if (N <= 0x17) { @@ -10128,7 +10235,7 @@ class basic_json } // LCOV_EXCL_STOP - // append string + // step 2: write the string oa->write_characters(reinterpret_cast(j.m_value.string->c_str()), j.m_value.string->size()); break; @@ -10136,6 +10243,7 @@ class basic_json case value_t::array: { + // step 1: write control byte and the array size const auto N = j.m_value.array->size(); if (N <= 0x17) { @@ -10164,7 +10272,7 @@ class basic_json } // LCOV_EXCL_STOP - // append each element + // step 2: write each element for (const auto& el : *j.m_value.array) { write_cbor(el); @@ -10174,6 +10282,7 @@ class basic_json case value_t::object: { + // step 1: write control byte and the object size const auto N = j.m_value.object->size(); if (N <= 0x17) { @@ -10202,7 +10311,7 @@ class basic_json } // LCOV_EXCL_STOP - // append each element + // step 2: write each element for (const auto& el : *j.m_value.object) { write_cbor(el.first); @@ -10218,6 +10327,9 @@ class basic_json } } + /*! + @brief[in] j JSON value to serialize + */ void write_msgpack(const basic_json& j) { switch (j.type()) @@ -10353,6 +10465,7 @@ class basic_json case value_t::string: { + // step 1: write control byte and the string length const auto N = j.m_value.string->size(); if (N <= 31) { @@ -10378,7 +10491,7 @@ class basic_json write_number(static_cast(N)); } - // append string + // step 2: write the string oa->write_characters(reinterpret_cast(j.m_value.string->c_str()), j.m_value.string->size()); break; @@ -10386,6 +10499,7 @@ class basic_json case value_t::array: { + // step 1: write control byte and the array size const auto N = j.m_value.array->size(); if (N <= 15) { @@ -10405,7 +10519,7 @@ class basic_json write_number(static_cast(N)); } - // append each element + // step 2: write each element for (const auto& el : *j.m_value.array) { write_msgpack(el); @@ -10415,6 +10529,7 @@ class basic_json case value_t::object: { + // step 1: write control byte and the object size const auto N = j.m_value.object->size(); if (N <= 15) { @@ -10434,7 +10549,7 @@ class basic_json write_number(static_cast(N)); } - // append each element + // step 2: write each element for (const auto& el : *j.m_value.object) { write_msgpack(el.first); @@ -10451,12 +10566,24 @@ class basic_json } private: + /* + @brief write a number to output input + + @param[in] n number of type @a T + @tparam T the type of the number + + @note This function needs to respect the system's endianess, because + bytes in CBOR and MessagePack are stored in network order (big + endian) and therefore need reordering on little endian systems. + */ template void write_number(T n) { + // step 1: write number to array of length T std::array vec; std::memcpy(vec.data(), &n, sizeof(T)); + // step 2: write array to output (with possible reordering) for (size_t i = 0; i < sizeof(T); ++i) { // reverse byte order prior to conversion if necessary @@ -10471,13 +10598,6 @@ class basic_json } } - // from http://stackoverflow.com/a/1001328/266378 - static bool little_endianess() - { - int num = 1; - return (*reinterpret_cast(&num) == 1); - } - private: /// whether we can assume little endianess const bool is_little_endian = true; @@ -10928,12 +11048,19 @@ class basic_json // scan functions ///////////////////// - // must be called after \u was read; returns following xxxx as hex or -1 when error + /*! + @brief get codepoint from 4 hex characters following `\u` + + @return codepoint or -1 in case of an error (e.g. EOF or non-hex + character) + */ int get_codepoint() { + // this function only makes sense after reading `\u` assert(current == 'u'); int codepoint = 0; + // byte 1: \uXxxx switch (get()) { case '0': @@ -10993,6 +11120,7 @@ class basic_json return -1; } + // byte 2: \uxXxx switch (get()) { case '0': @@ -11052,6 +11180,7 @@ class basic_json return -1; } + // byte 3: \uxxXx switch (get()) { case '0': @@ -11111,6 +11240,7 @@ class basic_json return -1; } + // byte 4: \uxxxX switch (get()) { case '0': @@ -11173,6 +11303,10 @@ class basic_json return codepoint; } + /*! + @brief create diagnostic representation of a codepoint + @return string "U+XXXX" for codepoint XXXX + */ static std::string codepoint_to_string(int codepoint) { std::stringstream ss; @@ -11180,6 +11314,20 @@ class basic_json return ss.str(); } + /*! + @brief scan a string literal + + This function scans a string according to Sect. 7 of RFC 7159. While + scanning, bytes are escaped and copied into buffer yytext. Then the + function returns successfully, yytext is null-terminated and yylen + contains the number of bytes in the string. + + @return token_type::value_string if string could be successfully + scanned, token_type::parse_error otherwise + + @note In case of errors, variable error_message contains a textual + description. + */ token_type scan_string() { // reset yytext (ignore opening quote) @@ -11714,6 +11862,17 @@ class basic_json } /*! + @brief scan a number literal + + This function scans a string according to Sect. 6 of RFC 7159. + + The function is realized with a deterministic finite state machine + derived from the grammar described in RFC 7159. Starting in state + "init", the input is read and used to determined the next state. Only + state "done" accepts the number. State "error" is a trap state to model + errors. In the table below, "anything" means any character but the ones + listed before. + state | 0 | 1-9 | e E | + | - | . | anything ---------|----------|----------|----------|---------|---------|----------|----------- init | zero | any1 | [error] | [error] | minus | [error] | [error] @@ -11725,13 +11884,31 @@ class basic_json exponent | any2 | any2 | [error] | sign | sign | [error] | [error] sign | any2 | any2 | [error] | [error] | [error] | [error] | [error] any2 | any2 | any2 | done | done | done | done | done + + The state machine is realized with one label per state (prefixed with + "scan_number_") and `goto` statements between them. The state machine + contains cycles, but any cycle can be left when EOF is read. Therefore, + the function is guaranteed to terminate. + + During scanning, the read bytes are stored in yytext. This string is + then converted to a signed integer, an unsigned integer, or a + floating-point number. + + @return token_type::value_unsigned, token_type::value_integer, or + token_type::value_float if number could be successfully scanned, + token_type::parse_error otherwise + + @note The scanner is independent of the current locale. Internally, the + locale's decimal point is used instead of `.` to work with the + locale-dependent converters. */ token_type scan_number() { + // reset yytext to store the number's bytes reset(); - // the type of the parsed number; initially set to unsigned; will - // be changed if minus sign, decimal point or exponent is read + // the type of the parsed number; initially set to unsigned; will be + // changed if minus sign, decimal point or exponent is read token_type number_type = token_type::value_unsigned; // state (init): we just found out we need to scan a number @@ -12008,7 +12185,8 @@ scan_number_any2: } scan_number_done: - // unget the character after the number + // unget the character after the number (we only read it to know + // that we are done scanning a number) --chars_read; next_unget = true; @@ -12155,7 +12333,7 @@ scan_number_done: const std::string get_string() { // yytext cannot be returned as char*, because it may contain a - // null byte + // null byte (parsed as "\u0000") return std::string(yytext.data(), yylen); } @@ -12302,7 +12480,7 @@ scan_number_done: number_float_t value_float = 0; /// the decimal point - const char decimal_point_char = '\0'; + const char decimal_point_char = '.'; }; /*! From 4aedae400ecb0c99424d2f6721db40b5816c1f44 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Sun, 23 Apr 2017 15:12:50 +0200 Subject: [PATCH 37/44] :white_check_mark: added exhaustive UTF-8 tests Creates all well-formed sequences of bytes up to length 4. Furthermore, creates ill-formed sequences by removing required trailing bytes or changing bytes. As the tests can take a lot of time, preprocessor symbols are introduced. --- test/src/unit-unicode.cpp | 1052 +++++++++++++++++++++++++++++++++++++ 1 file changed, 1052 insertions(+) diff --git a/test/src/unit-unicode.cpp b/test/src/unit-unicode.cpp index 9fff61af..d889a87b 100644 --- a/test/src/unit-unicode.cpp +++ b/test/src/unit-unicode.cpp @@ -34,6 +34,1058 @@ using nlohmann::json; #include +TEST_CASE("RFC 3629") +{ + /* + RFC 3629 describes in Sect. 4 the syntax of UTF-8 byte sequences as + follows: + + A UTF-8 string is a sequence of octets representing a sequence of UCS + characters. An octet sequence is valid UTF-8 only if it matches the + following syntax, which is derived from the rules for encoding UTF-8 + and is expressed in the ABNF of [RFC2234]. + + UTF8-octets = *( UTF8-char ) + UTF8-char = UTF8-1 / UTF8-2 / UTF8-3 / UTF8-4 + UTF8-1 = %x00-7F + UTF8-2 = %xC2-DF UTF8-tail + UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) / + %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail ) + UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) / + %xF4 %x80-8F 2( UTF8-tail ) + UTF8-tail = %x80-BF + */ + + auto create_string = [](int byte1, int byte2 = -1, int byte3 = -1, int byte4 = -1) + { + std::string result = "\"" + std::string(1, static_cast(byte1)); + if (byte2 != -1) + { + result += std::string(1, static_cast(byte2)); + } + if (byte3 != -1) + { + result += std::string(1, static_cast(byte3)); + } + if (byte4 != -1) + { + result += std::string(1, static_cast(byte4)); + } + result += "\""; + return result; + }; + + SECTION("ill-formed first byte") + { + for (int byte1 = 0x80; byte1 <= 0xC1; ++byte1) + { + const auto json_string = create_string(byte1); + CAPTURE(byte1); + CAPTURE(json_string); + CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + } + + for (int byte1 = 0xF5; byte1 <= 0xFF; ++byte1) + { + const auto json_string = create_string(byte1); + CAPTURE(byte1); + CAPTURE(json_string); + CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + } + } + + SECTION("UTF8-1 (x00-x7F)") + { + SECTION("well-formed") + { + for (int byte1 = 0x00; byte1 <= 0x7F; ++byte1) + { + const auto json_string = create_string(byte1); + CAPTURE(byte1); + CAPTURE(json_string); + + // unescaped control characters are parse errors in JSON + if (0x00 <= byte1 and byte1 <= 0x1F) + { + CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + continue; + } + + // a single quote is a parse error in JSON + if (byte1 == 0x22) + { + CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + continue; + } + + // a single backslash is a parse error in JSON + if (byte1 == 0x5C) + { + CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + continue; + } + + // all other characters are OK + CHECK_NOTHROW(json::parse(json_string)); + } + } + } + + SECTION("UTF8-2 (xC2-xDF UTF8-tail)") + { + SECTION("well-formed") + { + for (int byte1 = 0xC2; byte1 <= 0xDF; ++byte1) + { + for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2) + { + const auto json_string = create_string(byte1, byte2); + CAPTURE(byte1); + CAPTURE(byte2); + CAPTURE(json_string); + CHECK_NOTHROW(json::parse(json_string)); + } + } + } + + SECTION("ill-formed: missing second byte") + { + for (int byte1 = 0xC2; byte1 <= 0xDF; ++byte1) + { + const auto json_string = create_string(byte1); + CAPTURE(byte1); + CAPTURE(json_string); + CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + } + } + +#ifdef WRONG_SECOND + SECTION("ill-formed: wrong second byte") + { + for (int byte1 = 0xC2; byte1 <= 0xDF; ++byte1) + { + for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2) + { + // skip correct second byte + if (0x80 <= byte2 and byte2 <= 0xBF) + { + continue; + } + + const auto json_string = create_string(byte1, byte2); + CAPTURE(byte1); + CAPTURE(byte2); + CAPTURE(json_string); + CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + } + } + } +#endif + } + + SECTION("UTF8-3 (xE0 xA0-BF UTF8-tail)") + { + SECTION("well-formed") + { + for (int byte1 = 0xE0; byte1 <= 0xE0; ++byte1) + { + for (int byte2 = 0xA0; byte2 <= 0xBF; ++byte2) + { + for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) + { + const auto json_string = create_string(byte1, byte2, byte3); + CAPTURE(byte1); + CAPTURE(byte2); + CAPTURE(byte3); + CAPTURE(json_string); + CHECK_NOTHROW(json::parse(json_string)); + } + } + } + } + + SECTION("ill-formed: missing second byte") + { + for (int byte1 = 0xE0; byte1 <= 0xE0; ++byte1) + { + const auto json_string = create_string(byte1); + CAPTURE(byte1); + CAPTURE(json_string); + CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + } + } + + SECTION("ill-formed: missing third byte") + { + for (int byte1 = 0xE0; byte1 <= 0xE0; ++byte1) + { + for (int byte2 = 0xA0; byte2 <= 0xBF; ++byte2) + { + const auto json_string = create_string(byte1, byte2); + CAPTURE(byte1); + CAPTURE(byte2); + CAPTURE(json_string); + CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + } + } + } + +#ifdef WRONG_SECOND + SECTION("ill-formed: wrong second byte") + { + for (int byte1 = 0xE0; byte1 <= 0xE0; ++byte1) + { + for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2) + { + // skip correct second byte + if (0xA0 <= byte2 and byte2 <= 0xBF) + { + continue; + } + + for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) + { + const auto json_string = create_string(byte1, byte2, byte3); + CAPTURE(byte1); + CAPTURE(byte2); + CAPTURE(byte3); + CAPTURE(json_string); + CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + } + } + } + } +#endif + +#ifdef WRONG_THIRD + SECTION("ill-formed: wrong third byte") + { + for (int byte1 = 0xE0; byte1 <= 0xE0; ++byte1) + { + for (int byte2 = 0xA0; byte2 <= 0xBF; ++byte2) + { + for (int byte3 = 0x00; byte3 <= 0xFF; ++byte2) + { + // skip correct third byte + if (0x80 <= byte3 and byte3 <= 0xBF) + { + continue; + } + + const auto json_string = create_string(byte1, byte2, byte3); + CAPTURE(byte1); + CAPTURE(byte2); + CAPTURE(byte3); + CAPTURE(json_string); + CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + } + } + } + } +#endif + } + + SECTION("UTF8-3 (xE1-xEC UTF8-tail UTF8-tail)") + { + SECTION("well-formed") + { + for (int byte1 = 0xE1; byte1 <= 0xEC; ++byte1) + { + for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2) + { + for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) + { + const auto json_string = create_string(byte1, byte2, byte3); + CAPTURE(byte1); + CAPTURE(byte2); + CAPTURE(byte3); + CAPTURE(json_string); + CHECK_NOTHROW(json::parse(json_string)); + } + } + } + } + + SECTION("ill-formed: missing second byte") + { + for (int byte1 = 0xE1; byte1 <= 0xEC; ++byte1) + { + const auto json_string = create_string(byte1); + CAPTURE(byte1); + CAPTURE(json_string); + CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + } + } + + SECTION("ill-formed: missing third byte") + { + for (int byte1 = 0xE1; byte1 <= 0xEC; ++byte1) + { + for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2) + { + const auto json_string = create_string(byte1, byte2); + CAPTURE(byte1); + CAPTURE(byte2); + CAPTURE(json_string); + CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + } + } + } + +#ifdef WRONG_SECOND + SECTION("ill-formed: wrong second byte") + { + for (int byte1 = 0xE1; byte1 <= 0xEC; ++byte1) + { + for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2) + { + // skip correct second byte + if (0x80 <= byte2 and byte2 <= 0xBF) + { + continue; + } + + for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) + { + const auto json_string = create_string(byte1, byte2, byte3); + CAPTURE(byte1); + CAPTURE(byte2); + CAPTURE(byte3); + CAPTURE(json_string); + CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + } + } + } + } +#endif + +#ifdef WRONG_THIRD + SECTION("ill-formed: wrong third byte") + { + for (int byte1 = 0xE1; byte1 <= 0xEC; ++byte1) + { + for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2) + { + for (int byte3 = 0x00; byte3 <= 0xFF; ++byte2) + { + // skip correct third byte + if (0x80 <= byte3 and byte3 <= 0xBF) + { + continue; + } + + const auto json_string = create_string(byte1, byte2, byte3); + CAPTURE(byte1); + CAPTURE(byte2); + CAPTURE(byte3); + CAPTURE(json_string); + CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + } + } + } + } +#endif + } + + SECTION("UTF8-3 (xED x80-9F UTF8-tail)") + { + SECTION("well-formed") + { + for (int byte1 = 0xED; byte1 <= 0xED; ++byte1) + { + for (int byte2 = 0x80; byte2 <= 0x9F; ++byte2) + { + for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) + { + const auto json_string = create_string(byte1, byte2, byte3); + CAPTURE(byte1); + CAPTURE(byte2); + CAPTURE(byte3); + CAPTURE(json_string); + CHECK_NOTHROW(json::parse(json_string)); + } + } + } + } + + SECTION("ill-formed: missing second byte") + { + for (int byte1 = 0xED; byte1 <= 0xED; ++byte1) + { + const auto json_string = create_string(byte1); + CAPTURE(byte1); + CAPTURE(json_string); + CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + } + } + + SECTION("ill-formed: missing third byte") + { + for (int byte1 = 0xED; byte1 <= 0xED; ++byte1) + { + for (int byte2 = 0x80; byte2 <= 0x9F; ++byte2) + { + const auto json_string = create_string(byte1, byte2); + CAPTURE(byte1); + CAPTURE(byte2); + CAPTURE(json_string); + CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + } + } + } + +#ifdef WRONG_SECOND + SECTION("ill-formed: wrong second byte") + { + for (int byte1 = 0xED; byte1 <= 0xED; ++byte1) + { + for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2) + { + // skip correct second byte + if (0x80 <= byte2 and byte2 <= 0x9F) + { + continue; + } + + for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) + { + const auto json_string = create_string(byte1, byte2, byte3); + CAPTURE(byte1); + CAPTURE(byte2); + CAPTURE(byte3); + CAPTURE(json_string); + CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + } + } + } + } +#endif + +#ifdef WRONG_THIRD + SECTION("ill-formed: wrong third byte") + { + for (int byte1 = 0xED; byte1 <= 0xED; ++byte1) + { + for (int byte2 = 0x80; byte2 <= 0x9F; ++byte2) + { + for (int byte3 = 0x00; byte3 <= 0xFF; ++byte2) + { + // skip correct third byte + if (0x80 <= byte3 and byte3 <= 0xBF) + { + continue; + } + + const auto json_string = create_string(byte1, byte2, byte3); + CAPTURE(byte1); + CAPTURE(byte2); + CAPTURE(byte3); + CAPTURE(json_string); + CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + } + } + } + } +#endif + } + + SECTION("UTF8-3 (xEE-xEF UTF8-tail UTF8-tail)") + { + SECTION("well-formed") + { + for (int byte1 = 0xEE; byte1 <= 0xEF; ++byte1) + { + for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2) + { + for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) + { + const auto json_string = create_string(byte1, byte2, byte3); + CAPTURE(byte1); + CAPTURE(byte2); + CAPTURE(byte3); + CAPTURE(json_string); + CHECK_NOTHROW(json::parse(json_string)); + } + } + } + } + + SECTION("ill-formed: missing second byte") + { + for (int byte1 = 0xEE; byte1 <= 0xEF; ++byte1) + { + const auto json_string = create_string(byte1); + CAPTURE(byte1); + CAPTURE(json_string); + CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + } + } + + SECTION("ill-formed: missing third byte") + { + for (int byte1 = 0xEE; byte1 <= 0xEF; ++byte1) + { + for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2) + { + const auto json_string = create_string(byte1, byte2); + CAPTURE(byte1); + CAPTURE(byte2); + CAPTURE(json_string); + CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + } + } + } + +#ifdef WRONG_SECOND + SECTION("ill-formed: wrong second byte") + { + for (int byte1 = 0xEE; byte1 <= 0xEF; ++byte1) + { + for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2) + { + // skip correct second byte + if (0x80 <= byte2 and byte2 <= 0xBF) + { + continue; + } + + for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) + { + const auto json_string = create_string(byte1, byte2, byte3); + CAPTURE(byte1); + CAPTURE(byte2); + CAPTURE(byte3); + CAPTURE(json_string); + CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + } + } + } + } +#endif + +#ifdef WRONG_THIRD + SECTION("ill-formed: wrong third byte") + { + for (int byte1 = 0xEE; byte1 <= 0xEF; ++byte1) + { + for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2) + { + for (int byte3 = 0x00; byte3 <= 0xFF; ++byte2) + { + // skip correct third byte + if (0x80 <= byte3 and byte3 <= 0xBF) + { + continue; + } + + const auto json_string = create_string(byte1, byte2, byte3); + CAPTURE(byte1); + CAPTURE(byte2); + CAPTURE(byte3); + CAPTURE(json_string); + CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + } + } + } + } +#endif + } + + SECTION("UTF8-4 (xF0 x90-BF UTF8-tail UTF8-tail)") + { + SECTION("well-formed") + { + for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1) + { + for (int byte2 = 0x90; byte2 <= 0xBF; ++byte2) + { + for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) + { + for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4) + { + const auto json_string = create_string(byte1, byte2, byte3, byte4); + CAPTURE(byte1); + CAPTURE(byte2); + CAPTURE(byte3); + CAPTURE(byte4); + CAPTURE(json_string); + CHECK_NOTHROW(json::parse(json_string)); + } + } + } + } + } + + SECTION("ill-formed: missing second byte") + { + for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1) + { + const auto json_string = create_string(byte1); + CAPTURE(byte1); + CAPTURE(json_string); + CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + } + } + + SECTION("ill-formed: missing third byte") + { + for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1) + { + for (int byte2 = 0x90; byte2 <= 0xBF; ++byte2) + { + const auto json_string = create_string(byte1, byte2); + CAPTURE(byte1); + CAPTURE(byte2); + CAPTURE(json_string); + CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + } + } + } + + SECTION("ill-formed: missing fourth byte") + { + for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1) + { + for (int byte2 = 0x90; byte2 <= 0xBF; ++byte2) + { + for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) + { + const auto json_string = create_string(byte1, byte2, byte3); + CAPTURE(byte1); + CAPTURE(byte2); + CAPTURE(byte3); + CAPTURE(json_string); + CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + } + } + } + } + +#ifdef WRONG_SECOND + SECTION("ill-formed: wrong second byte") + { + for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1) + { + for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2) + { + // skip correct second byte + if (0x90 <= byte2 and byte2 <= 0xBF) + { + continue; + } + + for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) + { + for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4) + { + const auto json_string = create_string(byte1, byte2, byte3, byte4); + CAPTURE(byte1); + CAPTURE(byte2); + CAPTURE(byte3); + CAPTURE(byte4); + CAPTURE(json_string); + CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + } + } + } + } + } +#endif + +#ifdef WRONG_THIRD + SECTION("ill-formed: wrong third byte") + { + for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1) + { + for (int byte2 = 0x90; byte2 <= 0xBF; ++byte2) + { + for (int byte3 = 0x00; byte3 <= 0xFF; ++byte2) + { + // skip correct third byte + if (0x80 <= byte3 and byte3 <= 0xBF) + { + continue; + } + + for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4) + { + const auto json_string = create_string(byte1, byte2, byte3, byte4); + CAPTURE(byte1); + CAPTURE(byte2); + CAPTURE(byte3); + CAPTURE(byte4); + CAPTURE(json_string); + CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + } + } + } + } + } +#endif + +#ifdef WRONG_FOURTH + SECTION("ill-formed: wrong fourth byte") + { + for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1) + { + for (int byte2 = 0x90; byte2 <= 0xBF; ++byte2) + { + for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) + { + { + for (int byte4 = 0x00; byte4 <= 0xFF; ++byte4) + { + // skip correct second byte + if (0x80 <= byte3 and byte3 <= 0xBF) + { + continue; + } + + const auto json_string = create_string(byte1, byte2, byte3, byte4); + CAPTURE(byte1); + CAPTURE(byte2); + CAPTURE(byte3); + CAPTURE(byte4); + CAPTURE(json_string); + CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + } + } + } + } + } + } +#endif + } + + SECTION("UTF8-4 (xF1-F3 UTF8-tail UTF8-tail UTF8-tail)") + { + SECTION("well-formed") + { + for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1) + { + for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2) + { + for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) + { + for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4) + { + const auto json_string = create_string(byte1, byte2, byte3, byte4); + CAPTURE(byte1); + CAPTURE(byte2); + CAPTURE(byte3); + CAPTURE(byte4); + CAPTURE(json_string); + CHECK_NOTHROW(json::parse(json_string)); + } + } + } + } + } + + SECTION("ill-formed: missing second byte") + { + for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1) + { + const auto json_string = create_string(byte1); + CAPTURE(byte1); + CAPTURE(json_string); + CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + } + } + + SECTION("ill-formed: missing third byte") + { + for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1) + { + for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2) + { + const auto json_string = create_string(byte1, byte2); + CAPTURE(byte1); + CAPTURE(byte2); + CAPTURE(json_string); + CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + } + } + } + + SECTION("ill-formed: missing fourth byte") + { + for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1) + { + for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2) + { + for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) + { + const auto json_string = create_string(byte1, byte2, byte3); + CAPTURE(byte1); + CAPTURE(byte2); + CAPTURE(byte3); + CAPTURE(json_string); + CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + } + } + } + } + +#ifdef WRONG_SECOND + SECTION("ill-formed: wrong second byte") + { + for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1) + { + for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2) + { + // skip correct second byte + if (0x80 <= byte2 and byte2 <= 0xBF) + { + continue; + } + + for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) + { + for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4) + { + const auto json_string = create_string(byte1, byte2, byte3, byte4); + CAPTURE(byte1); + CAPTURE(byte2); + CAPTURE(byte3); + CAPTURE(byte4); + CAPTURE(json_string); + CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + } + } + } + } + } +#endif + +#ifdef WRONG_THIRD + SECTION("ill-formed: wrong third byte") + { + for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1) + { + for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2) + { + for (int byte3 = 0x00; byte3 <= 0xFF; ++byte2) + { + // skip correct third byte + if (0x80 <= byte3 and byte3 <= 0xBF) + { + continue; + } + + for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4) + { + const auto json_string = create_string(byte1, byte2, byte3, byte4); + CAPTURE(byte1); + CAPTURE(byte2); + CAPTURE(byte3); + CAPTURE(byte4); + CAPTURE(json_string); + CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + } + } + } + } + } +#endif + +#ifdef WRONG_FOURTH + SECTION("ill-formed: wrong fourth byte") + { + for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1) + { + for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2) + { + for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) + { + for (int byte4 = 0x00; byte4 <= 0xFF; ++byte4) + { + // skip correct second byte + if (0x80 <= byte3 and byte3 <= 0xBF) + { + continue; + } + + const auto json_string = create_string(byte1, byte2, byte3, byte4); + CAPTURE(byte1); + CAPTURE(byte2); + CAPTURE(byte3); + CAPTURE(byte4); + CAPTURE(json_string); + CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + } + } + } + } + } +#endif + } + + SECTION("UTF8-4 (xF4 x80-8F UTF8-tail UTF8-tail)") + { + SECTION("well-formed") + { + for (int byte1 = 0xF4; byte1 <= 0xF4; ++byte1) + { + for (int byte2 = 0x80; byte2 <= 0x8F; ++byte2) + { + for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) + { + for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4) + { + const auto json_string = create_string(byte1, byte2, byte3, byte4); + CAPTURE(byte1); + CAPTURE(byte2); + CAPTURE(byte3); + CAPTURE(byte4); + CAPTURE(json_string); + CHECK_NOTHROW(json::parse(json_string)); + } + } + } + } + } + + SECTION("ill-formed: missing second byte") + { + for (int byte1 = 0xF4; byte1 <= 0xF4; ++byte1) + { + const auto json_string = create_string(byte1); + CAPTURE(byte1); + CAPTURE(json_string); + CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + } + } + + SECTION("ill-formed: missing third byte") + { + for (int byte1 = 0xF4; byte1 <= 0xF4; ++byte1) + { + for (int byte2 = 0x80; byte2 <= 0x8F; ++byte2) + { + const auto json_string = create_string(byte1, byte2); + CAPTURE(byte1); + CAPTURE(byte2); + CAPTURE(json_string); + CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + } + } + } + + SECTION("ill-formed: missing fourth byte") + { + for (int byte1 = 0xF4; byte1 <= 0xF4; ++byte1) + { + for (int byte2 = 0x80; byte2 <= 0x8F; ++byte2) + { + for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) + { + const auto json_string = create_string(byte1, byte2, byte3); + CAPTURE(byte1); + CAPTURE(byte2); + CAPTURE(byte3); + CAPTURE(json_string); + CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + } + } + } + } + +#ifdef WRONG_SECOND + SECTION("ill-formed: wrong second byte") + { + for (int byte1 = 0xF4; byte1 <= 0xF4; ++byte1) + { + for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2) + { + // skip correct second byte + if (0x80 <= byte2 and byte2 <= 0x8F) + { + continue; + } + + for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) + { + for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4) + { + const auto json_string = create_string(byte1, byte2, byte3, byte4); + CAPTURE(byte1); + CAPTURE(byte2); + CAPTURE(byte3); + CAPTURE(byte4); + CAPTURE(json_string); + CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + } + } + } + } + } +#endif + +#ifdef WRONG_THIRD + SECTION("ill-formed: wrong third byte") + { + for (int byte1 = 0xF4; byte1 <= 0xF4; ++byte1) + { + for (int byte2 = 0x80; byte2 <= 0x8F; ++byte2) + { + for (int byte3 = 0x00; byte3 <= 0xFF; ++byte2) + { + // skip correct third byte + if (0x80 <= byte3 and byte3 <= 0xBF) + { + continue; + } + + for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4) + { + const auto json_string = create_string(byte1, byte2, byte3, byte4); + CAPTURE(byte1); + CAPTURE(byte2); + CAPTURE(byte3); + CAPTURE(byte4); + CAPTURE(json_string); + CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + } + } + } + } + } +#endif + +#ifdef WRONG_FOURTH + SECTION("ill-formed: wrong fourth byte") + { + for (int byte1 = 0xF4; byte1 <= 0xF4; ++byte1) + { + for (int byte2 = 0x80; byte2 <= 0x8F; ++byte2) + { + for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) + { + for (int byte4 = 0x00; byte4 <= 0xFF; ++byte4) + { + // skip correct second byte + if (0x80 <= byte3 and byte3 <= 0xBF) + { + continue; + } + + const auto json_string = create_string(byte1, byte2, byte3, byte4); + CAPTURE(byte1); + CAPTURE(byte2); + CAPTURE(byte3); + CAPTURE(byte4); + CAPTURE(json_string); + CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + } + } + } + } + } +#endif + } +} + TEST_CASE("Unicode", "[hide]") { /* NOTE: to_unicode is not used any more From 5febd04a26a6afccf1c335e012b443503c9ef068 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Sun, 23 Apr 2017 18:22:35 +0200 Subject: [PATCH 38/44] :bug: fixed test suite --- test/src/unit-unicode.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/test/src/unit-unicode.cpp b/test/src/unit-unicode.cpp index d889a87b..a0eef991 100644 --- a/test/src/unit-unicode.cpp +++ b/test/src/unit-unicode.cpp @@ -34,7 +34,7 @@ using nlohmann::json; #include -TEST_CASE("RFC 3629") +TEST_CASE("RFC 3629", "[hide]") { /* RFC 3629 describes in Sect. 4 the syntax of UTF-8 byte sequences as @@ -264,7 +264,7 @@ TEST_CASE("RFC 3629") { for (int byte2 = 0xA0; byte2 <= 0xBF; ++byte2) { - for (int byte3 = 0x00; byte3 <= 0xFF; ++byte2) + for (int byte3 = 0x00; byte3 <= 0xFF; ++byte3) { // skip correct third byte if (0x80 <= byte3 and byte3 <= 0xBF) @@ -366,7 +366,7 @@ TEST_CASE("RFC 3629") { for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2) { - for (int byte3 = 0x00; byte3 <= 0xFF; ++byte2) + for (int byte3 = 0x00; byte3 <= 0xFF; ++byte3) { // skip correct third byte if (0x80 <= byte3 and byte3 <= 0xBF) @@ -468,7 +468,7 @@ TEST_CASE("RFC 3629") { for (int byte2 = 0x80; byte2 <= 0x9F; ++byte2) { - for (int byte3 = 0x00; byte3 <= 0xFF; ++byte2) + for (int byte3 = 0x00; byte3 <= 0xFF; ++byte3) { // skip correct third byte if (0x80 <= byte3 and byte3 <= 0xBF) @@ -570,7 +570,7 @@ TEST_CASE("RFC 3629") { for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2) { - for (int byte3 = 0x00; byte3 <= 0xFF; ++byte2) + for (int byte3 = 0x00; byte3 <= 0xFF; ++byte3) { // skip correct third byte if (0x80 <= byte3 and byte3 <= 0xBF) @@ -699,7 +699,7 @@ TEST_CASE("RFC 3629") { for (int byte2 = 0x90; byte2 <= 0xBF; ++byte2) { - for (int byte3 = 0x00; byte3 <= 0xFF; ++byte2) + for (int byte3 = 0x00; byte3 <= 0xFF; ++byte3) { // skip correct third byte if (0x80 <= byte3 and byte3 <= 0xBF) @@ -865,7 +865,7 @@ TEST_CASE("RFC 3629") { for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2) { - for (int byte3 = 0x00; byte3 <= 0xFF; ++byte2) + for (int byte3 = 0x00; byte3 <= 0xFF; ++byte3) { // skip correct third byte if (0x80 <= byte3 and byte3 <= 0xBF) @@ -1029,7 +1029,7 @@ TEST_CASE("RFC 3629") { for (int byte2 = 0x80; byte2 <= 0x8F; ++byte2) { - for (int byte3 = 0x00; byte3 <= 0xFF; ++byte2) + for (int byte3 = 0x00; byte3 <= 0xFF; ++byte3) { // skip correct third byte if (0x80 <= byte3 and byte3 <= 0xBF) From b686cc6ad980dd5aa58978424b8554a368678fb6 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Sun, 23 Apr 2017 18:34:14 +0200 Subject: [PATCH 39/44] :hammer: removed #ifdefs --- test/src/unit-unicode.cpp | 36 ------------------------------------ 1 file changed, 36 deletions(-) diff --git a/test/src/unit-unicode.cpp b/test/src/unit-unicode.cpp index a0eef991..f2f2f83c 100644 --- a/test/src/unit-unicode.cpp +++ b/test/src/unit-unicode.cpp @@ -159,7 +159,6 @@ TEST_CASE("RFC 3629", "[hide]") } } -#ifdef WRONG_SECOND SECTION("ill-formed: wrong second byte") { for (int byte1 = 0xC2; byte1 <= 0xDF; ++byte1) @@ -180,7 +179,6 @@ TEST_CASE("RFC 3629", "[hide]") } } } -#endif } SECTION("UTF8-3 (xE0 xA0-BF UTF8-tail)") @@ -230,7 +228,6 @@ TEST_CASE("RFC 3629", "[hide]") } } -#ifdef WRONG_SECOND SECTION("ill-formed: wrong second byte") { for (int byte1 = 0xE0; byte1 <= 0xE0; ++byte1) @@ -255,9 +252,7 @@ TEST_CASE("RFC 3629", "[hide]") } } } -#endif -#ifdef WRONG_THIRD SECTION("ill-formed: wrong third byte") { for (int byte1 = 0xE0; byte1 <= 0xE0; ++byte1) @@ -282,7 +277,6 @@ TEST_CASE("RFC 3629", "[hide]") } } } -#endif } SECTION("UTF8-3 (xE1-xEC UTF8-tail UTF8-tail)") @@ -332,7 +326,6 @@ TEST_CASE("RFC 3629", "[hide]") } } -#ifdef WRONG_SECOND SECTION("ill-formed: wrong second byte") { for (int byte1 = 0xE1; byte1 <= 0xEC; ++byte1) @@ -357,9 +350,7 @@ TEST_CASE("RFC 3629", "[hide]") } } } -#endif -#ifdef WRONG_THIRD SECTION("ill-formed: wrong third byte") { for (int byte1 = 0xE1; byte1 <= 0xEC; ++byte1) @@ -384,7 +375,6 @@ TEST_CASE("RFC 3629", "[hide]") } } } -#endif } SECTION("UTF8-3 (xED x80-9F UTF8-tail)") @@ -434,7 +424,6 @@ TEST_CASE("RFC 3629", "[hide]") } } -#ifdef WRONG_SECOND SECTION("ill-formed: wrong second byte") { for (int byte1 = 0xED; byte1 <= 0xED; ++byte1) @@ -459,9 +448,7 @@ TEST_CASE("RFC 3629", "[hide]") } } } -#endif -#ifdef WRONG_THIRD SECTION("ill-formed: wrong third byte") { for (int byte1 = 0xED; byte1 <= 0xED; ++byte1) @@ -486,7 +473,6 @@ TEST_CASE("RFC 3629", "[hide]") } } } -#endif } SECTION("UTF8-3 (xEE-xEF UTF8-tail UTF8-tail)") @@ -536,7 +522,6 @@ TEST_CASE("RFC 3629", "[hide]") } } -#ifdef WRONG_SECOND SECTION("ill-formed: wrong second byte") { for (int byte1 = 0xEE; byte1 <= 0xEF; ++byte1) @@ -561,9 +546,7 @@ TEST_CASE("RFC 3629", "[hide]") } } } -#endif -#ifdef WRONG_THIRD SECTION("ill-formed: wrong third byte") { for (int byte1 = 0xEE; byte1 <= 0xEF; ++byte1) @@ -588,7 +571,6 @@ TEST_CASE("RFC 3629", "[hide]") } } } -#endif } SECTION("UTF8-4 (xF0 x90-BF UTF8-tail UTF8-tail)") @@ -661,7 +643,6 @@ TEST_CASE("RFC 3629", "[hide]") } } -#ifdef WRONG_SECOND SECTION("ill-formed: wrong second byte") { for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1) @@ -690,9 +671,7 @@ TEST_CASE("RFC 3629", "[hide]") } } } -#endif -#ifdef WRONG_THIRD SECTION("ill-formed: wrong third byte") { for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1) @@ -721,9 +700,7 @@ TEST_CASE("RFC 3629", "[hide]") } } } -#endif -#ifdef WRONG_FOURTH SECTION("ill-formed: wrong fourth byte") { for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1) @@ -754,7 +731,6 @@ TEST_CASE("RFC 3629", "[hide]") } } } -#endif } SECTION("UTF8-4 (xF1-F3 UTF8-tail UTF8-tail UTF8-tail)") @@ -827,7 +803,6 @@ TEST_CASE("RFC 3629", "[hide]") } } -#ifdef WRONG_SECOND SECTION("ill-formed: wrong second byte") { for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1) @@ -856,9 +831,7 @@ TEST_CASE("RFC 3629", "[hide]") } } } -#endif -#ifdef WRONG_THIRD SECTION("ill-formed: wrong third byte") { for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1) @@ -887,9 +860,7 @@ TEST_CASE("RFC 3629", "[hide]") } } } -#endif -#ifdef WRONG_FOURTH SECTION("ill-formed: wrong fourth byte") { for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1) @@ -918,7 +889,6 @@ TEST_CASE("RFC 3629", "[hide]") } } } -#endif } SECTION("UTF8-4 (xF4 x80-8F UTF8-tail UTF8-tail)") @@ -991,7 +961,6 @@ TEST_CASE("RFC 3629", "[hide]") } } -#ifdef WRONG_SECOND SECTION("ill-formed: wrong second byte") { for (int byte1 = 0xF4; byte1 <= 0xF4; ++byte1) @@ -1020,9 +989,7 @@ TEST_CASE("RFC 3629", "[hide]") } } } -#endif -#ifdef WRONG_THIRD SECTION("ill-formed: wrong third byte") { for (int byte1 = 0xF4; byte1 <= 0xF4; ++byte1) @@ -1051,9 +1018,7 @@ TEST_CASE("RFC 3629", "[hide]") } } } -#endif -#ifdef WRONG_FOURTH SECTION("ill-formed: wrong fourth byte") { for (int byte1 = 0xF4; byte1 <= 0xF4; ++byte1) @@ -1082,7 +1047,6 @@ TEST_CASE("RFC 3629", "[hide]") } } } -#endif } } From 01e05d89e9e15e37219ec40090b1e37f3ae75f90 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Sun, 23 Apr 2017 18:40:17 +0200 Subject: [PATCH 40/44] :hammer: fixed a compiler warning Default arguments are forbidden in lambdas. --- test/src/unit-unicode.cpp | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/test/src/unit-unicode.cpp b/test/src/unit-unicode.cpp index f2f2f83c..93df6808 100644 --- a/test/src/unit-unicode.cpp +++ b/test/src/unit-unicode.cpp @@ -34,6 +34,25 @@ using nlohmann::json; #include +std::string create_string(int byte1, int byte2 = -1, int byte3 = -1, int byte4 = -1) +{ + std::string result = "\"" + std::string(1, static_cast(byte1)); + if (byte2 != -1) + { + result += std::string(1, static_cast(byte2)); + } + if (byte3 != -1) + { + result += std::string(1, static_cast(byte3)); + } + if (byte4 != -1) + { + result += std::string(1, static_cast(byte4)); + } + result += "\""; + return result; +} + TEST_CASE("RFC 3629", "[hide]") { /* @@ -56,25 +75,6 @@ TEST_CASE("RFC 3629", "[hide]") UTF8-tail = %x80-BF */ - auto create_string = [](int byte1, int byte2 = -1, int byte3 = -1, int byte4 = -1) - { - std::string result = "\"" + std::string(1, static_cast(byte1)); - if (byte2 != -1) - { - result += std::string(1, static_cast(byte2)); - } - if (byte3 != -1) - { - result += std::string(1, static_cast(byte3)); - } - if (byte4 != -1) - { - result += std::string(1, static_cast(byte4)); - } - result += "\""; - return result; - }; - SECTION("ill-formed first byte") { for (int byte1 = 0x80; byte1 <= 0xC1; ++byte1) From 734297ff45bcc1474cfdc8e02ea1222f41d40890 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Sun, 23 Apr 2017 20:32:05 +0200 Subject: [PATCH 41/44] :hammer: cleanup --- README.md | 2 +- test/src/unit-unicode.cpp | 360 +++++++++----------------------------- 2 files changed, 82 insertions(+), 280 deletions(-) diff --git a/README.md b/README.md index 265ec06f..fbae1a79 100644 --- a/README.md +++ b/README.md @@ -897,7 +897,7 @@ $ make json_unit -Ctest $ ./test/json_unit "*" =============================================================================== -All tests passed (11203022 assertions in 48 test cases) +All tests passed (13391115 assertions in 49 test cases) ``` Alternatively, you can use [CMake](https://cmake.org) and run diff --git a/test/src/unit-unicode.cpp b/test/src/unit-unicode.cpp index 93df6808..7b94db67 100644 --- a/test/src/unit-unicode.cpp +++ b/test/src/unit-unicode.cpp @@ -34,23 +34,44 @@ using nlohmann::json; #include -std::string create_string(int byte1, int byte2 = -1, int byte3 = -1, int byte4 = -1) +// create and check a JSON string with up to four UTF-8 bytes +void check_utf8string(bool success_expected, int byte1, int byte2 = -1, int byte3 = -1, int byte4 = -1) { - std::string result = "\"" + std::string(1, static_cast(byte1)); + std::string json_string = "\""; + + CAPTURE(byte1); + json_string += std::string(1, static_cast(byte1)); + if (byte2 != -1) { - result += std::string(1, static_cast(byte2)); + CAPTURE(byte2); + json_string += std::string(1, static_cast(byte2)); } + if (byte3 != -1) { - result += std::string(1, static_cast(byte3)); + CAPTURE(byte3); + json_string += std::string(1, static_cast(byte3)); } + if (byte4 != -1) { - result += std::string(1, static_cast(byte4)); + CAPTURE(byte4); + json_string += std::string(1, static_cast(byte4)); + } + + json_string += "\""; + + CAPTURE(json_string); + + if (success_expected) + { + CHECK_NOTHROW(json::parse(json_string)); + } + else + { + CHECK_THROWS_AS(json::parse(json_string), json::parse_error); } - result += "\""; - return result; } TEST_CASE("RFC 3629", "[hide]") @@ -79,18 +100,12 @@ TEST_CASE("RFC 3629", "[hide]") { for (int byte1 = 0x80; byte1 <= 0xC1; ++byte1) { - const auto json_string = create_string(byte1); - CAPTURE(byte1); - CAPTURE(json_string); - CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + check_utf8string(false, byte1); } for (int byte1 = 0xF5; byte1 <= 0xFF; ++byte1) { - const auto json_string = create_string(byte1); - CAPTURE(byte1); - CAPTURE(json_string); - CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + check_utf8string(false, byte1); } } @@ -100,33 +115,29 @@ TEST_CASE("RFC 3629", "[hide]") { for (int byte1 = 0x00; byte1 <= 0x7F; ++byte1) { - const auto json_string = create_string(byte1); - CAPTURE(byte1); - CAPTURE(json_string); - // unescaped control characters are parse errors in JSON if (0x00 <= byte1 and byte1 <= 0x1F) { - CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + check_utf8string(false, byte1); continue; } // a single quote is a parse error in JSON if (byte1 == 0x22) { - CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + check_utf8string(false, byte1); continue; } // a single backslash is a parse error in JSON if (byte1 == 0x5C) { - CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + check_utf8string(false, byte1); continue; } // all other characters are OK - CHECK_NOTHROW(json::parse(json_string)); + check_utf8string(true, byte1); } } } @@ -139,11 +150,7 @@ TEST_CASE("RFC 3629", "[hide]") { for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2) { - const auto json_string = create_string(byte1, byte2); - CAPTURE(byte1); - CAPTURE(byte2); - CAPTURE(json_string); - CHECK_NOTHROW(json::parse(json_string)); + check_utf8string(true, byte1, byte2); } } } @@ -152,10 +159,7 @@ TEST_CASE("RFC 3629", "[hide]") { for (int byte1 = 0xC2; byte1 <= 0xDF; ++byte1) { - const auto json_string = create_string(byte1); - CAPTURE(byte1); - CAPTURE(json_string); - CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + check_utf8string(false, byte1); } } @@ -171,11 +175,7 @@ TEST_CASE("RFC 3629", "[hide]") continue; } - const auto json_string = create_string(byte1, byte2); - CAPTURE(byte1); - CAPTURE(byte2); - CAPTURE(json_string); - CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + check_utf8string(false, byte1, byte2); } } } @@ -191,12 +191,7 @@ TEST_CASE("RFC 3629", "[hide]") { for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) { - const auto json_string = create_string(byte1, byte2, byte3); - CAPTURE(byte1); - CAPTURE(byte2); - CAPTURE(byte3); - CAPTURE(json_string); - CHECK_NOTHROW(json::parse(json_string)); + check_utf8string(true, byte1, byte2, byte3); } } } @@ -206,10 +201,7 @@ TEST_CASE("RFC 3629", "[hide]") { for (int byte1 = 0xE0; byte1 <= 0xE0; ++byte1) { - const auto json_string = create_string(byte1); - CAPTURE(byte1); - CAPTURE(json_string); - CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + check_utf8string(false, byte1); } } @@ -219,11 +211,7 @@ TEST_CASE("RFC 3629", "[hide]") { for (int byte2 = 0xA0; byte2 <= 0xBF; ++byte2) { - const auto json_string = create_string(byte1, byte2); - CAPTURE(byte1); - CAPTURE(byte2); - CAPTURE(json_string); - CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + check_utf8string(false, byte1, byte2); } } } @@ -242,12 +230,7 @@ TEST_CASE("RFC 3629", "[hide]") for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) { - const auto json_string = create_string(byte1, byte2, byte3); - CAPTURE(byte1); - CAPTURE(byte2); - CAPTURE(byte3); - CAPTURE(json_string); - CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + check_utf8string(false, byte1, byte2, byte3); } } } @@ -267,12 +250,7 @@ TEST_CASE("RFC 3629", "[hide]") continue; } - const auto json_string = create_string(byte1, byte2, byte3); - CAPTURE(byte1); - CAPTURE(byte2); - CAPTURE(byte3); - CAPTURE(json_string); - CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + check_utf8string(false, byte1, byte2, byte3); } } } @@ -289,12 +267,7 @@ TEST_CASE("RFC 3629", "[hide]") { for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) { - const auto json_string = create_string(byte1, byte2, byte3); - CAPTURE(byte1); - CAPTURE(byte2); - CAPTURE(byte3); - CAPTURE(json_string); - CHECK_NOTHROW(json::parse(json_string)); + check_utf8string(true, byte1, byte2, byte3); } } } @@ -304,10 +277,7 @@ TEST_CASE("RFC 3629", "[hide]") { for (int byte1 = 0xE1; byte1 <= 0xEC; ++byte1) { - const auto json_string = create_string(byte1); - CAPTURE(byte1); - CAPTURE(json_string); - CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + check_utf8string(false, byte1); } } @@ -317,11 +287,7 @@ TEST_CASE("RFC 3629", "[hide]") { for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2) { - const auto json_string = create_string(byte1, byte2); - CAPTURE(byte1); - CAPTURE(byte2); - CAPTURE(json_string); - CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + check_utf8string(false, byte1, byte2); } } } @@ -340,12 +306,7 @@ TEST_CASE("RFC 3629", "[hide]") for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) { - const auto json_string = create_string(byte1, byte2, byte3); - CAPTURE(byte1); - CAPTURE(byte2); - CAPTURE(byte3); - CAPTURE(json_string); - CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + check_utf8string(false, byte1, byte2, byte3); } } } @@ -365,12 +326,7 @@ TEST_CASE("RFC 3629", "[hide]") continue; } - const auto json_string = create_string(byte1, byte2, byte3); - CAPTURE(byte1); - CAPTURE(byte2); - CAPTURE(byte3); - CAPTURE(json_string); - CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + check_utf8string(false, byte1, byte2, byte3); } } } @@ -387,12 +343,7 @@ TEST_CASE("RFC 3629", "[hide]") { for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) { - const auto json_string = create_string(byte1, byte2, byte3); - CAPTURE(byte1); - CAPTURE(byte2); - CAPTURE(byte3); - CAPTURE(json_string); - CHECK_NOTHROW(json::parse(json_string)); + check_utf8string(true, byte1, byte2, byte3); } } } @@ -402,10 +353,7 @@ TEST_CASE("RFC 3629", "[hide]") { for (int byte1 = 0xED; byte1 <= 0xED; ++byte1) { - const auto json_string = create_string(byte1); - CAPTURE(byte1); - CAPTURE(json_string); - CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + check_utf8string(false, byte1); } } @@ -415,11 +363,7 @@ TEST_CASE("RFC 3629", "[hide]") { for (int byte2 = 0x80; byte2 <= 0x9F; ++byte2) { - const auto json_string = create_string(byte1, byte2); - CAPTURE(byte1); - CAPTURE(byte2); - CAPTURE(json_string); - CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + check_utf8string(false, byte1, byte2); } } } @@ -438,12 +382,7 @@ TEST_CASE("RFC 3629", "[hide]") for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) { - const auto json_string = create_string(byte1, byte2, byte3); - CAPTURE(byte1); - CAPTURE(byte2); - CAPTURE(byte3); - CAPTURE(json_string); - CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + check_utf8string(false, byte1, byte2, byte3); } } } @@ -463,12 +402,7 @@ TEST_CASE("RFC 3629", "[hide]") continue; } - const auto json_string = create_string(byte1, byte2, byte3); - CAPTURE(byte1); - CAPTURE(byte2); - CAPTURE(byte3); - CAPTURE(json_string); - CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + check_utf8string(false, byte1, byte2, byte3); } } } @@ -485,12 +419,7 @@ TEST_CASE("RFC 3629", "[hide]") { for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) { - const auto json_string = create_string(byte1, byte2, byte3); - CAPTURE(byte1); - CAPTURE(byte2); - CAPTURE(byte3); - CAPTURE(json_string); - CHECK_NOTHROW(json::parse(json_string)); + check_utf8string(true, byte1, byte2, byte3); } } } @@ -500,10 +429,7 @@ TEST_CASE("RFC 3629", "[hide]") { for (int byte1 = 0xEE; byte1 <= 0xEF; ++byte1) { - const auto json_string = create_string(byte1); - CAPTURE(byte1); - CAPTURE(json_string); - CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + check_utf8string(false, byte1); } } @@ -513,11 +439,7 @@ TEST_CASE("RFC 3629", "[hide]") { for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2) { - const auto json_string = create_string(byte1, byte2); - CAPTURE(byte1); - CAPTURE(byte2); - CAPTURE(json_string); - CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + check_utf8string(false, byte1, byte2); } } } @@ -536,12 +458,7 @@ TEST_CASE("RFC 3629", "[hide]") for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) { - const auto json_string = create_string(byte1, byte2, byte3); - CAPTURE(byte1); - CAPTURE(byte2); - CAPTURE(byte3); - CAPTURE(json_string); - CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + check_utf8string(false, byte1, byte2, byte3); } } } @@ -561,12 +478,7 @@ TEST_CASE("RFC 3629", "[hide]") continue; } - const auto json_string = create_string(byte1, byte2, byte3); - CAPTURE(byte1); - CAPTURE(byte2); - CAPTURE(byte3); - CAPTURE(json_string); - CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + check_utf8string(false, byte1, byte2, byte3); } } } @@ -585,13 +497,7 @@ TEST_CASE("RFC 3629", "[hide]") { for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4) { - const auto json_string = create_string(byte1, byte2, byte3, byte4); - CAPTURE(byte1); - CAPTURE(byte2); - CAPTURE(byte3); - CAPTURE(byte4); - CAPTURE(json_string); - CHECK_NOTHROW(json::parse(json_string)); + check_utf8string(true, byte1, byte2, byte3, byte4); } } } @@ -602,10 +508,7 @@ TEST_CASE("RFC 3629", "[hide]") { for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1) { - const auto json_string = create_string(byte1); - CAPTURE(byte1); - CAPTURE(json_string); - CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + check_utf8string(false, byte1); } } @@ -615,11 +518,7 @@ TEST_CASE("RFC 3629", "[hide]") { for (int byte2 = 0x90; byte2 <= 0xBF; ++byte2) { - const auto json_string = create_string(byte1, byte2); - CAPTURE(byte1); - CAPTURE(byte2); - CAPTURE(json_string); - CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + check_utf8string(false, byte1, byte2); } } } @@ -632,12 +531,7 @@ TEST_CASE("RFC 3629", "[hide]") { for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) { - const auto json_string = create_string(byte1, byte2, byte3); - CAPTURE(byte1); - CAPTURE(byte2); - CAPTURE(byte3); - CAPTURE(json_string); - CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + check_utf8string(false, byte1, byte2, byte3); } } } @@ -659,13 +553,7 @@ TEST_CASE("RFC 3629", "[hide]") { for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4) { - const auto json_string = create_string(byte1, byte2, byte3, byte4); - CAPTURE(byte1); - CAPTURE(byte2); - CAPTURE(byte3); - CAPTURE(byte4); - CAPTURE(json_string); - CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + check_utf8string(false, byte1, byte2, byte3, byte4); } } } @@ -688,13 +576,7 @@ TEST_CASE("RFC 3629", "[hide]") for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4) { - const auto json_string = create_string(byte1, byte2, byte3, byte4); - CAPTURE(byte1); - CAPTURE(byte2); - CAPTURE(byte3); - CAPTURE(byte4); - CAPTURE(json_string); - CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + check_utf8string(false, byte1, byte2, byte3, byte4); } } } @@ -709,24 +591,16 @@ TEST_CASE("RFC 3629", "[hide]") { for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) { - { for (int byte4 = 0x00; byte4 <= 0xFF; ++byte4) { - // skip correct second byte + // skip fourth second byte if (0x80 <= byte3 and byte3 <= 0xBF) { continue; } - const auto json_string = create_string(byte1, byte2, byte3, byte4); - CAPTURE(byte1); - CAPTURE(byte2); - CAPTURE(byte3); - CAPTURE(byte4); - CAPTURE(json_string); - CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + check_utf8string(false, byte1, byte2, byte3, byte4); } - } } } } @@ -745,13 +619,7 @@ TEST_CASE("RFC 3629", "[hide]") { for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4) { - const auto json_string = create_string(byte1, byte2, byte3, byte4); - CAPTURE(byte1); - CAPTURE(byte2); - CAPTURE(byte3); - CAPTURE(byte4); - CAPTURE(json_string); - CHECK_NOTHROW(json::parse(json_string)); + check_utf8string(true, byte1, byte2, byte3, byte4); } } } @@ -762,10 +630,7 @@ TEST_CASE("RFC 3629", "[hide]") { for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1) { - const auto json_string = create_string(byte1); - CAPTURE(byte1); - CAPTURE(json_string); - CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + check_utf8string(false, byte1); } } @@ -775,11 +640,7 @@ TEST_CASE("RFC 3629", "[hide]") { for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2) { - const auto json_string = create_string(byte1, byte2); - CAPTURE(byte1); - CAPTURE(byte2); - CAPTURE(json_string); - CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + check_utf8string(false, byte1, byte2); } } } @@ -792,12 +653,7 @@ TEST_CASE("RFC 3629", "[hide]") { for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) { - const auto json_string = create_string(byte1, byte2, byte3); - CAPTURE(byte1); - CAPTURE(byte2); - CAPTURE(byte3); - CAPTURE(json_string); - CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + check_utf8string(false, byte1, byte2, byte3); } } } @@ -819,13 +675,7 @@ TEST_CASE("RFC 3629", "[hide]") { for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4) { - const auto json_string = create_string(byte1, byte2, byte3, byte4); - CAPTURE(byte1); - CAPTURE(byte2); - CAPTURE(byte3); - CAPTURE(byte4); - CAPTURE(json_string); - CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + check_utf8string(false, byte1, byte2, byte3, byte4); } } } @@ -848,13 +698,7 @@ TEST_CASE("RFC 3629", "[hide]") for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4) { - const auto json_string = create_string(byte1, byte2, byte3, byte4); - CAPTURE(byte1); - CAPTURE(byte2); - CAPTURE(byte3); - CAPTURE(byte4); - CAPTURE(json_string); - CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + check_utf8string(false, byte1, byte2, byte3, byte4); } } } @@ -871,19 +715,13 @@ TEST_CASE("RFC 3629", "[hide]") { for (int byte4 = 0x00; byte4 <= 0xFF; ++byte4) { - // skip correct second byte + // skip correct fourth byte if (0x80 <= byte3 and byte3 <= 0xBF) { continue; } - const auto json_string = create_string(byte1, byte2, byte3, byte4); - CAPTURE(byte1); - CAPTURE(byte2); - CAPTURE(byte3); - CAPTURE(byte4); - CAPTURE(json_string); - CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + check_utf8string(false, byte1, byte2, byte3, byte4); } } } @@ -903,13 +741,7 @@ TEST_CASE("RFC 3629", "[hide]") { for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4) { - const auto json_string = create_string(byte1, byte2, byte3, byte4); - CAPTURE(byte1); - CAPTURE(byte2); - CAPTURE(byte3); - CAPTURE(byte4); - CAPTURE(json_string); - CHECK_NOTHROW(json::parse(json_string)); + check_utf8string(true, byte1, byte2, byte3, byte4); } } } @@ -920,10 +752,7 @@ TEST_CASE("RFC 3629", "[hide]") { for (int byte1 = 0xF4; byte1 <= 0xF4; ++byte1) { - const auto json_string = create_string(byte1); - CAPTURE(byte1); - CAPTURE(json_string); - CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + check_utf8string(false, byte1); } } @@ -933,11 +762,7 @@ TEST_CASE("RFC 3629", "[hide]") { for (int byte2 = 0x80; byte2 <= 0x8F; ++byte2) { - const auto json_string = create_string(byte1, byte2); - CAPTURE(byte1); - CAPTURE(byte2); - CAPTURE(json_string); - CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + check_utf8string(false, byte1, byte2); } } } @@ -950,12 +775,7 @@ TEST_CASE("RFC 3629", "[hide]") { for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) { - const auto json_string = create_string(byte1, byte2, byte3); - CAPTURE(byte1); - CAPTURE(byte2); - CAPTURE(byte3); - CAPTURE(json_string); - CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + check_utf8string(false, byte1, byte2, byte3); } } } @@ -977,13 +797,7 @@ TEST_CASE("RFC 3629", "[hide]") { for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4) { - const auto json_string = create_string(byte1, byte2, byte3, byte4); - CAPTURE(byte1); - CAPTURE(byte2); - CAPTURE(byte3); - CAPTURE(byte4); - CAPTURE(json_string); - CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + check_utf8string(false, byte1, byte2, byte3, byte4); } } } @@ -1006,13 +820,7 @@ TEST_CASE("RFC 3629", "[hide]") for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4) { - const auto json_string = create_string(byte1, byte2, byte3, byte4); - CAPTURE(byte1); - CAPTURE(byte2); - CAPTURE(byte3); - CAPTURE(byte4); - CAPTURE(json_string); - CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + check_utf8string(false, byte1, byte2, byte3, byte4); } } } @@ -1029,19 +837,13 @@ TEST_CASE("RFC 3629", "[hide]") { for (int byte4 = 0x00; byte4 <= 0xFF; ++byte4) { - // skip correct second byte + // skip correct fourth byte if (0x80 <= byte3 and byte3 <= 0xBF) { continue; } - const auto json_string = create_string(byte1, byte2, byte3, byte4); - CAPTURE(byte1); - CAPTURE(byte2); - CAPTURE(byte3); - CAPTURE(byte4); - CAPTURE(json_string); - CHECK_THROWS_AS(json::parse(json_string), json::parse_error); + check_utf8string(false, byte1, byte2, byte3, byte4); } } } From 6d2c0a79287c8013f16c2674d37f52a4ccd93be6 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Sun, 23 Apr 2017 22:54:21 +0200 Subject: [PATCH 42/44] :white_check_mark: added more Unicode test cases --- test/src/unit-unicode.cpp | 1237 +++++++++++++++++++------------------ 1 file changed, 631 insertions(+), 606 deletions(-) diff --git a/test/src/unit-unicode.cpp b/test/src/unit-unicode.cpp index 7b94db67..67e97346 100644 --- a/test/src/unit-unicode.cpp +++ b/test/src/unit-unicode.cpp @@ -74,523 +74,525 @@ void check_utf8string(bool success_expected, int byte1, int byte2 = -1, int byte } } -TEST_CASE("RFC 3629", "[hide]") +TEST_CASE("Unicode", "[hide]") { - /* - RFC 3629 describes in Sect. 4 the syntax of UTF-8 byte sequences as - follows: - - A UTF-8 string is a sequence of octets representing a sequence of UCS - characters. An octet sequence is valid UTF-8 only if it matches the - following syntax, which is derived from the rules for encoding UTF-8 - and is expressed in the ABNF of [RFC2234]. - - UTF8-octets = *( UTF8-char ) - UTF8-char = UTF8-1 / UTF8-2 / UTF8-3 / UTF8-4 - UTF8-1 = %x00-7F - UTF8-2 = %xC2-DF UTF8-tail - UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) / - %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail ) - UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) / - %xF4 %x80-8F 2( UTF8-tail ) - UTF8-tail = %x80-BF - */ - - SECTION("ill-formed first byte") + SECTION("RFC 3629") { - for (int byte1 = 0x80; byte1 <= 0xC1; ++byte1) - { - check_utf8string(false, byte1); - } + /* + RFC 3629 describes in Sect. 4 the syntax of UTF-8 byte sequences as + follows: - for (int byte1 = 0xF5; byte1 <= 0xFF; ++byte1) - { - check_utf8string(false, byte1); - } - } + A UTF-8 string is a sequence of octets representing a sequence of UCS + characters. An octet sequence is valid UTF-8 only if it matches the + following syntax, which is derived from the rules for encoding UTF-8 + and is expressed in the ABNF of [RFC2234]. - SECTION("UTF8-1 (x00-x7F)") - { - SECTION("well-formed") + UTF8-octets = *( UTF8-char ) + UTF8-char = UTF8-1 / UTF8-2 / UTF8-3 / UTF8-4 + UTF8-1 = %x00-7F + UTF8-2 = %xC2-DF UTF8-tail + UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) / + %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail ) + UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) / + %xF4 %x80-8F 2( UTF8-tail ) + UTF8-tail = %x80-BF + */ + + SECTION("ill-formed first byte") { - for (int byte1 = 0x00; byte1 <= 0x7F; ++byte1) + for (int byte1 = 0x80; byte1 <= 0xC1; ++byte1) { - // unescaped control characters are parse errors in JSON - if (0x00 <= byte1 and byte1 <= 0x1F) + check_utf8string(false, byte1); + } + + for (int byte1 = 0xF5; byte1 <= 0xFF; ++byte1) + { + check_utf8string(false, byte1); + } + } + + SECTION("UTF8-1 (x00-x7F)") + { + SECTION("well-formed") + { + for (int byte1 = 0x00; byte1 <= 0x7F; ++byte1) + { + // unescaped control characters are parse errors in JSON + if (0x00 <= byte1 and byte1 <= 0x1F) + { + check_utf8string(false, byte1); + continue; + } + + // a single quote is a parse error in JSON + if (byte1 == 0x22) + { + check_utf8string(false, byte1); + continue; + } + + // a single backslash is a parse error in JSON + if (byte1 == 0x5C) + { + check_utf8string(false, byte1); + continue; + } + + // all other characters are OK + check_utf8string(true, byte1); + } + } + } + + SECTION("UTF8-2 (xC2-xDF UTF8-tail)") + { + SECTION("well-formed") + { + for (int byte1 = 0xC2; byte1 <= 0xDF; ++byte1) + { + for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2) + { + check_utf8string(true, byte1, byte2); + } + } + } + + SECTION("ill-formed: missing second byte") + { + for (int byte1 = 0xC2; byte1 <= 0xDF; ++byte1) { check_utf8string(false, byte1); - continue; } + } - // a single quote is a parse error in JSON - if (byte1 == 0x22) + SECTION("ill-formed: wrong second byte") + { + for (int byte1 = 0xC2; byte1 <= 0xDF; ++byte1) + { + for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2) + { + // skip correct second byte + if (0x80 <= byte2 and byte2 <= 0xBF) + { + continue; + } + + check_utf8string(false, byte1, byte2); + } + } + } + } + + SECTION("UTF8-3 (xE0 xA0-BF UTF8-tail)") + { + SECTION("well-formed") + { + for (int byte1 = 0xE0; byte1 <= 0xE0; ++byte1) + { + for (int byte2 = 0xA0; byte2 <= 0xBF; ++byte2) + { + for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) + { + check_utf8string(true, byte1, byte2, byte3); + } + } + } + } + + SECTION("ill-formed: missing second byte") + { + for (int byte1 = 0xE0; byte1 <= 0xE0; ++byte1) { check_utf8string(false, byte1); - continue; } + } - // a single backslash is a parse error in JSON - if (byte1 == 0x5C) + SECTION("ill-formed: missing third byte") + { + for (int byte1 = 0xE0; byte1 <= 0xE0; ++byte1) + { + for (int byte2 = 0xA0; byte2 <= 0xBF; ++byte2) + { + check_utf8string(false, byte1, byte2); + } + } + } + + SECTION("ill-formed: wrong second byte") + { + for (int byte1 = 0xE0; byte1 <= 0xE0; ++byte1) + { + for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2) + { + // skip correct second byte + if (0xA0 <= byte2 and byte2 <= 0xBF) + { + continue; + } + + for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) + { + check_utf8string(false, byte1, byte2, byte3); + } + } + } + } + + SECTION("ill-formed: wrong third byte") + { + for (int byte1 = 0xE0; byte1 <= 0xE0; ++byte1) + { + for (int byte2 = 0xA0; byte2 <= 0xBF; ++byte2) + { + for (int byte3 = 0x00; byte3 <= 0xFF; ++byte3) + { + // skip correct third byte + if (0x80 <= byte3 and byte3 <= 0xBF) + { + continue; + } + + check_utf8string(false, byte1, byte2, byte3); + } + } + } + } + } + + SECTION("UTF8-3 (xE1-xEC UTF8-tail UTF8-tail)") + { + SECTION("well-formed") + { + for (int byte1 = 0xE1; byte1 <= 0xEC; ++byte1) + { + for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2) + { + for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) + { + check_utf8string(true, byte1, byte2, byte3); + } + } + } + } + + SECTION("ill-formed: missing second byte") + { + for (int byte1 = 0xE1; byte1 <= 0xEC; ++byte1) { check_utf8string(false, byte1); - continue; - } - - // all other characters are OK - check_utf8string(true, byte1); - } - } - } - - SECTION("UTF8-2 (xC2-xDF UTF8-tail)") - { - SECTION("well-formed") - { - for (int byte1 = 0xC2; byte1 <= 0xDF; ++byte1) - { - for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2) - { - check_utf8string(true, byte1, byte2); } } - } - SECTION("ill-formed: missing second byte") - { - for (int byte1 = 0xC2; byte1 <= 0xDF; ++byte1) + SECTION("ill-formed: missing third byte") { - check_utf8string(false, byte1); - } - } - - SECTION("ill-formed: wrong second byte") - { - for (int byte1 = 0xC2; byte1 <= 0xDF; ++byte1) - { - for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2) + for (int byte1 = 0xE1; byte1 <= 0xEC; ++byte1) { - // skip correct second byte - if (0x80 <= byte2 and byte2 <= 0xBF) + for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2) { - continue; - } - - check_utf8string(false, byte1, byte2); - } - } - } - } - - SECTION("UTF8-3 (xE0 xA0-BF UTF8-tail)") - { - SECTION("well-formed") - { - for (int byte1 = 0xE0; byte1 <= 0xE0; ++byte1) - { - for (int byte2 = 0xA0; byte2 <= 0xBF; ++byte2) - { - for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) - { - check_utf8string(true, byte1, byte2, byte3); + check_utf8string(false, byte1, byte2); } } } - } - SECTION("ill-formed: missing second byte") - { - for (int byte1 = 0xE0; byte1 <= 0xE0; ++byte1) + SECTION("ill-formed: wrong second byte") { - check_utf8string(false, byte1); - } - } - - SECTION("ill-formed: missing third byte") - { - for (int byte1 = 0xE0; byte1 <= 0xE0; ++byte1) - { - for (int byte2 = 0xA0; byte2 <= 0xBF; ++byte2) + for (int byte1 = 0xE1; byte1 <= 0xEC; ++byte1) { - check_utf8string(false, byte1, byte2); - } - } - } - - SECTION("ill-formed: wrong second byte") - { - for (int byte1 = 0xE0; byte1 <= 0xE0; ++byte1) - { - for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2) - { - // skip correct second byte - if (0xA0 <= byte2 and byte2 <= 0xBF) + for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2) { - continue; - } - - for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) - { - check_utf8string(false, byte1, byte2, byte3); - } - } - } - } - - SECTION("ill-formed: wrong third byte") - { - for (int byte1 = 0xE0; byte1 <= 0xE0; ++byte1) - { - for (int byte2 = 0xA0; byte2 <= 0xBF; ++byte2) - { - for (int byte3 = 0x00; byte3 <= 0xFF; ++byte3) - { - // skip correct third byte - if (0x80 <= byte3 and byte3 <= 0xBF) + // skip correct second byte + if (0x80 <= byte2 and byte2 <= 0xBF) { continue; } - check_utf8string(false, byte1, byte2, byte3); + for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) + { + check_utf8string(false, byte1, byte2, byte3); + } } } } - } - } - SECTION("UTF8-3 (xE1-xEC UTF8-tail UTF8-tail)") - { - SECTION("well-formed") - { - for (int byte1 = 0xE1; byte1 <= 0xEC; ++byte1) + SECTION("ill-formed: wrong third byte") { - for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2) + for (int byte1 = 0xE1; byte1 <= 0xEC; ++byte1) { - for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) + for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2) { - check_utf8string(true, byte1, byte2, byte3); + for (int byte3 = 0x00; byte3 <= 0xFF; ++byte3) + { + // skip correct third byte + if (0x80 <= byte3 and byte3 <= 0xBF) + { + continue; + } + + check_utf8string(false, byte1, byte2, byte3); + } } } } } - SECTION("ill-formed: missing second byte") + SECTION("UTF8-3 (xED x80-9F UTF8-tail)") { - for (int byte1 = 0xE1; byte1 <= 0xEC; ++byte1) + SECTION("well-formed") { - check_utf8string(false, byte1); - } - } - - SECTION("ill-formed: missing third byte") - { - for (int byte1 = 0xE1; byte1 <= 0xEC; ++byte1) - { - for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2) + for (int byte1 = 0xED; byte1 <= 0xED; ++byte1) { - check_utf8string(false, byte1, byte2); - } - } - } - - SECTION("ill-formed: wrong second byte") - { - for (int byte1 = 0xE1; byte1 <= 0xEC; ++byte1) - { - for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2) - { - // skip correct second byte - if (0x80 <= byte2 and byte2 <= 0xBF) + for (int byte2 = 0x80; byte2 <= 0x9F; ++byte2) { - continue; - } - - for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) - { - check_utf8string(false, byte1, byte2, byte3); + for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) + { + check_utf8string(true, byte1, byte2, byte3); + } } } } - } - SECTION("ill-formed: wrong third byte") - { - for (int byte1 = 0xE1; byte1 <= 0xEC; ++byte1) + SECTION("ill-formed: missing second byte") { - for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2) + for (int byte1 = 0xED; byte1 <= 0xED; ++byte1) { - for (int byte3 = 0x00; byte3 <= 0xFF; ++byte3) + check_utf8string(false, byte1); + } + } + + SECTION("ill-formed: missing third byte") + { + for (int byte1 = 0xED; byte1 <= 0xED; ++byte1) + { + for (int byte2 = 0x80; byte2 <= 0x9F; ++byte2) { - // skip correct third byte - if (0x80 <= byte3 and byte3 <= 0xBF) + check_utf8string(false, byte1, byte2); + } + } + } + + SECTION("ill-formed: wrong second byte") + { + for (int byte1 = 0xED; byte1 <= 0xED; ++byte1) + { + for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2) + { + // skip correct second byte + if (0x80 <= byte2 and byte2 <= 0x9F) { continue; } - check_utf8string(false, byte1, byte2, byte3); + for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) + { + check_utf8string(false, byte1, byte2, byte3); + } } } } - } - } - SECTION("UTF8-3 (xED x80-9F UTF8-tail)") - { - SECTION("well-formed") - { - for (int byte1 = 0xED; byte1 <= 0xED; ++byte1) + SECTION("ill-formed: wrong third byte") { - for (int byte2 = 0x80; byte2 <= 0x9F; ++byte2) + for (int byte1 = 0xED; byte1 <= 0xED; ++byte1) { - for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) + for (int byte2 = 0x80; byte2 <= 0x9F; ++byte2) { - check_utf8string(true, byte1, byte2, byte3); + for (int byte3 = 0x00; byte3 <= 0xFF; ++byte3) + { + // skip correct third byte + if (0x80 <= byte3 and byte3 <= 0xBF) + { + continue; + } + + check_utf8string(false, byte1, byte2, byte3); + } } } } } - SECTION("ill-formed: missing second byte") + SECTION("UTF8-3 (xEE-xEF UTF8-tail UTF8-tail)") { - for (int byte1 = 0xED; byte1 <= 0xED; ++byte1) + SECTION("well-formed") { - check_utf8string(false, byte1); - } - } - - SECTION("ill-formed: missing third byte") - { - for (int byte1 = 0xED; byte1 <= 0xED; ++byte1) - { - for (int byte2 = 0x80; byte2 <= 0x9F; ++byte2) + for (int byte1 = 0xEE; byte1 <= 0xEF; ++byte1) { - check_utf8string(false, byte1, byte2); - } - } - } - - SECTION("ill-formed: wrong second byte") - { - for (int byte1 = 0xED; byte1 <= 0xED; ++byte1) - { - for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2) - { - // skip correct second byte - if (0x80 <= byte2 and byte2 <= 0x9F) + for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2) { - continue; - } - - for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) - { - check_utf8string(false, byte1, byte2, byte3); + for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) + { + check_utf8string(true, byte1, byte2, byte3); + } } } } - } - SECTION("ill-formed: wrong third byte") - { - for (int byte1 = 0xED; byte1 <= 0xED; ++byte1) + SECTION("ill-formed: missing second byte") { - for (int byte2 = 0x80; byte2 <= 0x9F; ++byte2) + for (int byte1 = 0xEE; byte1 <= 0xEF; ++byte1) { - for (int byte3 = 0x00; byte3 <= 0xFF; ++byte3) + check_utf8string(false, byte1); + } + } + + SECTION("ill-formed: missing third byte") + { + for (int byte1 = 0xEE; byte1 <= 0xEF; ++byte1) + { + for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2) { - // skip correct third byte - if (0x80 <= byte3 and byte3 <= 0xBF) + check_utf8string(false, byte1, byte2); + } + } + } + + SECTION("ill-formed: wrong second byte") + { + for (int byte1 = 0xEE; byte1 <= 0xEF; ++byte1) + { + for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2) + { + // skip correct second byte + if (0x80 <= byte2 and byte2 <= 0xBF) { continue; } - check_utf8string(false, byte1, byte2, byte3); + for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) + { + check_utf8string(false, byte1, byte2, byte3); + } } } } - } - } - SECTION("UTF8-3 (xEE-xEF UTF8-tail UTF8-tail)") - { - SECTION("well-formed") - { - for (int byte1 = 0xEE; byte1 <= 0xEF; ++byte1) + SECTION("ill-formed: wrong third byte") { - for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2) + for (int byte1 = 0xEE; byte1 <= 0xEF; ++byte1) { - for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) + for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2) { - check_utf8string(true, byte1, byte2, byte3); + for (int byte3 = 0x00; byte3 <= 0xFF; ++byte3) + { + // skip correct third byte + if (0x80 <= byte3 and byte3 <= 0xBF) + { + continue; + } + + check_utf8string(false, byte1, byte2, byte3); + } } } } } - SECTION("ill-formed: missing second byte") + SECTION("UTF8-4 (xF0 x90-BF UTF8-tail UTF8-tail)") { - for (int byte1 = 0xEE; byte1 <= 0xEF; ++byte1) + SECTION("well-formed") { - check_utf8string(false, byte1); - } - } - - SECTION("ill-formed: missing third byte") - { - for (int byte1 = 0xEE; byte1 <= 0xEF; ++byte1) - { - for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2) + for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1) { - check_utf8string(false, byte1, byte2); - } - } - } - - SECTION("ill-formed: wrong second byte") - { - for (int byte1 = 0xEE; byte1 <= 0xEF; ++byte1) - { - for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2) - { - // skip correct second byte - if (0x80 <= byte2 and byte2 <= 0xBF) + for (int byte2 = 0x90; byte2 <= 0xBF; ++byte2) { - continue; - } - - for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) - { - check_utf8string(false, byte1, byte2, byte3); + for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) + { + for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4) + { + check_utf8string(true, byte1, byte2, byte3, byte4); + } + } } } } - } - SECTION("ill-formed: wrong third byte") - { - for (int byte1 = 0xEE; byte1 <= 0xEF; ++byte1) + SECTION("ill-formed: missing second byte") { - for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2) + for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1) { - for (int byte3 = 0x00; byte3 <= 0xFF; ++byte3) + check_utf8string(false, byte1); + } + } + + SECTION("ill-formed: missing third byte") + { + for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1) + { + for (int byte2 = 0x90; byte2 <= 0xBF; ++byte2) { - // skip correct third byte - if (0x80 <= byte3 and byte3 <= 0xBF) + check_utf8string(false, byte1, byte2); + } + } + } + + SECTION("ill-formed: missing fourth byte") + { + for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1) + { + for (int byte2 = 0x90; byte2 <= 0xBF; ++byte2) + { + for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) + { + check_utf8string(false, byte1, byte2, byte3); + } + } + } + } + + SECTION("ill-formed: wrong second byte") + { + for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1) + { + for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2) + { + // skip correct second byte + if (0x90 <= byte2 and byte2 <= 0xBF) { continue; } - check_utf8string(false, byte1, byte2, byte3); - } - } - } - } - } - - SECTION("UTF8-4 (xF0 x90-BF UTF8-tail UTF8-tail)") - { - SECTION("well-formed") - { - for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1) - { - for (int byte2 = 0x90; byte2 <= 0xBF; ++byte2) - { - for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) - { - for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4) + for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) { - check_utf8string(true, byte1, byte2, byte3, byte4); + for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4) + { + check_utf8string(false, byte1, byte2, byte3, byte4); + } } } } } - } - SECTION("ill-formed: missing second byte") - { - for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1) + SECTION("ill-formed: wrong third byte") { - check_utf8string(false, byte1); - } - } - - SECTION("ill-formed: missing third byte") - { - for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1) - { - for (int byte2 = 0x90; byte2 <= 0xBF; ++byte2) + for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1) { - check_utf8string(false, byte1, byte2); - } - } - } - - SECTION("ill-formed: missing fourth byte") - { - for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1) - { - for (int byte2 = 0x90; byte2 <= 0xBF; ++byte2) - { - for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) + for (int byte2 = 0x90; byte2 <= 0xBF; ++byte2) { - check_utf8string(false, byte1, byte2, byte3); - } - } - } - } - - SECTION("ill-formed: wrong second byte") - { - for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1) - { - for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2) - { - // skip correct second byte - if (0x90 <= byte2 and byte2 <= 0xBF) - { - continue; - } - - for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) - { - for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4) + for (int byte3 = 0x00; byte3 <= 0xFF; ++byte3) { - check_utf8string(false, byte1, byte2, byte3, byte4); + // skip correct third byte + if (0x80 <= byte3 and byte3 <= 0xBF) + { + continue; + } + + for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4) + { + check_utf8string(false, byte1, byte2, byte3, byte4); + } } } } } - } - SECTION("ill-formed: wrong third byte") - { - for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1) + SECTION("ill-formed: wrong fourth byte") { - for (int byte2 = 0x90; byte2 <= 0xBF; ++byte2) + for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1) { - for (int byte3 = 0x00; byte3 <= 0xFF; ++byte3) + for (int byte2 = 0x90; byte2 <= 0xBF; ++byte2) { - // skip correct third byte - if (0x80 <= byte3 and byte3 <= 0xBF) + for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) { - continue; - } - - for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4) - { - check_utf8string(false, byte1, byte2, byte3, byte4); - } - } - } - } - } - - SECTION("ill-formed: wrong fourth byte") - { - for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1) - { - for (int byte2 = 0x90; byte2 <= 0xBF; ++byte2) - { - for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) - { for (int byte4 = 0x00; byte4 <= 0xFF; ++byte4) { // skip fourth second byte @@ -601,269 +603,263 @@ TEST_CASE("RFC 3629", "[hide]") check_utf8string(false, byte1, byte2, byte3, byte4); } - } - } - } - } - } - - SECTION("UTF8-4 (xF1-F3 UTF8-tail UTF8-tail UTF8-tail)") - { - SECTION("well-formed") - { - for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1) - { - for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2) - { - for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) - { - for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4) - { - check_utf8string(true, byte1, byte2, byte3, byte4); } } } } } - SECTION("ill-formed: missing second byte") + SECTION("UTF8-4 (xF1-F3 UTF8-tail UTF8-tail UTF8-tail)") { - for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1) + SECTION("well-formed") { - check_utf8string(false, byte1); - } - } - - SECTION("ill-formed: missing third byte") - { - for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1) - { - for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2) + for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1) { - check_utf8string(false, byte1, byte2); - } - } - } - - SECTION("ill-formed: missing fourth byte") - { - for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1) - { - for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2) - { - for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) + for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2) { - check_utf8string(false, byte1, byte2, byte3); - } - } - } - } - - SECTION("ill-formed: wrong second byte") - { - for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1) - { - for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2) - { - // skip correct second byte - if (0x80 <= byte2 and byte2 <= 0xBF) - { - continue; - } - - for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) - { - for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4) + for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) { - check_utf8string(false, byte1, byte2, byte3, byte4); + for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4) + { + check_utf8string(true, byte1, byte2, byte3, byte4); + } } } } } - } - SECTION("ill-formed: wrong third byte") - { - for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1) + SECTION("ill-formed: missing second byte") { - for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2) + for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1) { - for (int byte3 = 0x00; byte3 <= 0xFF; ++byte3) + check_utf8string(false, byte1); + } + } + + SECTION("ill-formed: missing third byte") + { + for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1) + { + for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2) { - // skip correct third byte - if (0x80 <= byte3 and byte3 <= 0xBF) + check_utf8string(false, byte1, byte2); + } + } + } + + SECTION("ill-formed: missing fourth byte") + { + for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1) + { + for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2) + { + for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) + { + check_utf8string(false, byte1, byte2, byte3); + } + } + } + } + + SECTION("ill-formed: wrong second byte") + { + for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1) + { + for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2) + { + // skip correct second byte + if (0x80 <= byte2 and byte2 <= 0xBF) { continue; } - for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4) + for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) { - check_utf8string(false, byte1, byte2, byte3, byte4); + for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4) + { + check_utf8string(false, byte1, byte2, byte3, byte4); + } } } } } - } - SECTION("ill-formed: wrong fourth byte") - { - for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1) + SECTION("ill-formed: wrong third byte") { - for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2) + for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1) { - for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) + for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2) { - for (int byte4 = 0x00; byte4 <= 0xFF; ++byte4) + for (int byte3 = 0x00; byte3 <= 0xFF; ++byte3) { - // skip correct fourth byte + // skip correct third byte if (0x80 <= byte3 and byte3 <= 0xBF) { continue; } - check_utf8string(false, byte1, byte2, byte3, byte4); + for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4) + { + check_utf8string(false, byte1, byte2, byte3, byte4); + } } } } } - } - } - SECTION("UTF8-4 (xF4 x80-8F UTF8-tail UTF8-tail)") - { - SECTION("well-formed") - { - for (int byte1 = 0xF4; byte1 <= 0xF4; ++byte1) + SECTION("ill-formed: wrong fourth byte") { - for (int byte2 = 0x80; byte2 <= 0x8F; ++byte2) + for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1) { - for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) + for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2) { - for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4) + for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) { - check_utf8string(true, byte1, byte2, byte3, byte4); + for (int byte4 = 0x00; byte4 <= 0xFF; ++byte4) + { + // skip correct fourth byte + if (0x80 <= byte3 and byte3 <= 0xBF) + { + continue; + } + + check_utf8string(false, byte1, byte2, byte3, byte4); + } } } } } } - SECTION("ill-formed: missing second byte") + SECTION("UTF8-4 (xF4 x80-8F UTF8-tail UTF8-tail)") { - for (int byte1 = 0xF4; byte1 <= 0xF4; ++byte1) + SECTION("well-formed") { - check_utf8string(false, byte1); - } - } - - SECTION("ill-formed: missing third byte") - { - for (int byte1 = 0xF4; byte1 <= 0xF4; ++byte1) - { - for (int byte2 = 0x80; byte2 <= 0x8F; ++byte2) + for (int byte1 = 0xF4; byte1 <= 0xF4; ++byte1) { - check_utf8string(false, byte1, byte2); - } - } - } - - SECTION("ill-formed: missing fourth byte") - { - for (int byte1 = 0xF4; byte1 <= 0xF4; ++byte1) - { - for (int byte2 = 0x80; byte2 <= 0x8F; ++byte2) - { - for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) + for (int byte2 = 0x80; byte2 <= 0x8F; ++byte2) { - check_utf8string(false, byte1, byte2, byte3); - } - } - } - } - - SECTION("ill-formed: wrong second byte") - { - for (int byte1 = 0xF4; byte1 <= 0xF4; ++byte1) - { - for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2) - { - // skip correct second byte - if (0x80 <= byte2 and byte2 <= 0x8F) - { - continue; - } - - for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) - { - for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4) + for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) { - check_utf8string(false, byte1, byte2, byte3, byte4); + for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4) + { + check_utf8string(true, byte1, byte2, byte3, byte4); + } } } } } - } - SECTION("ill-formed: wrong third byte") - { - for (int byte1 = 0xF4; byte1 <= 0xF4; ++byte1) + SECTION("ill-formed: missing second byte") { - for (int byte2 = 0x80; byte2 <= 0x8F; ++byte2) + for (int byte1 = 0xF4; byte1 <= 0xF4; ++byte1) { - for (int byte3 = 0x00; byte3 <= 0xFF; ++byte3) + check_utf8string(false, byte1); + } + } + + SECTION("ill-formed: missing third byte") + { + for (int byte1 = 0xF4; byte1 <= 0xF4; ++byte1) + { + for (int byte2 = 0x80; byte2 <= 0x8F; ++byte2) { - // skip correct third byte - if (0x80 <= byte3 and byte3 <= 0xBF) + check_utf8string(false, byte1, byte2); + } + } + } + + SECTION("ill-formed: missing fourth byte") + { + for (int byte1 = 0xF4; byte1 <= 0xF4; ++byte1) + { + for (int byte2 = 0x80; byte2 <= 0x8F; ++byte2) + { + for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) + { + check_utf8string(false, byte1, byte2, byte3); + } + } + } + } + + SECTION("ill-formed: wrong second byte") + { + for (int byte1 = 0xF4; byte1 <= 0xF4; ++byte1) + { + for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2) + { + // skip correct second byte + if (0x80 <= byte2 and byte2 <= 0x8F) { continue; } - for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4) + for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) { - check_utf8string(false, byte1, byte2, byte3, byte4); + for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4) + { + check_utf8string(false, byte1, byte2, byte3, byte4); + } } } } } - } - SECTION("ill-formed: wrong fourth byte") - { - for (int byte1 = 0xF4; byte1 <= 0xF4; ++byte1) + SECTION("ill-formed: wrong third byte") { - for (int byte2 = 0x80; byte2 <= 0x8F; ++byte2) + for (int byte1 = 0xF4; byte1 <= 0xF4; ++byte1) { - for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) + for (int byte2 = 0x80; byte2 <= 0x8F; ++byte2) { - for (int byte4 = 0x00; byte4 <= 0xFF; ++byte4) + for (int byte3 = 0x00; byte3 <= 0xFF; ++byte3) { - // skip correct fourth byte + // skip correct third byte if (0x80 <= byte3 and byte3 <= 0xBF) { continue; } - check_utf8string(false, byte1, byte2, byte3, byte4); + for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4) + { + check_utf8string(false, byte1, byte2, byte3, byte4); + } + } + } + } + } + + SECTION("ill-formed: wrong fourth byte") + { + for (int byte1 = 0xF4; byte1 <= 0xF4; ++byte1) + { + for (int byte2 = 0x80; byte2 <= 0x8F; ++byte2) + { + for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) + { + for (int byte4 = 0x00; byte4 <= 0xFF; ++byte4) + { + // skip correct fourth byte + if (0x80 <= byte3 and byte3 <= 0xBF) + { + continue; + } + + check_utf8string(false, byte1, byte2, byte3, byte4); + } } } } } } } -} -TEST_CASE("Unicode", "[hide]") -{ - /* NOTE: to_unicode is not used any more - SECTION("full enumeration of Unicode code points") + SECTION("\\uxxxx sequences") { - // lexer to call to_unicode on - json::lexer dummy_lexer("", 0); - // create an escaped string from a code point const auto codepoint_to_unicode = [](std::size_t cp) { - // copd points are represented as a six-character sequence: a + // code points are represented as a six-character sequence: a // reverse solidus, followed by the lowercase letter u, followed // by four hexadecimal digits that encode the character's code // point @@ -872,72 +868,101 @@ TEST_CASE("Unicode", "[hide]") return ss.str(); }; - // generate all UTF-8 code points; in total, 1112064 code points are - // generated: 0x1FFFFF code points - 2048 invalid values between - // 0xD800 and 0xDFFF. - for (std::size_t cp = 0; cp <= 0x10FFFFu; ++cp) + SECTION("correct sequences") { - // The Unicode standard permanently reserves these code point - // values for UTF-16 encoding of the high and low surrogates, and - // they will never be assigned a character, so there should be no - // reason to encode them. The official Unicode standard says that - // no UTF forms, including UTF-16, can encode these code points. - if (cp >= 0xD800u and cp <= 0xDFFFu) + // generate all UTF-8 code points; in total, 1112064 code points are + // generated: 0x1FFFFF code points - 2048 invalid values between + // 0xD800 and 0xDFFF. + for (std::size_t cp = 0; cp <= 0x10FFFFu; ++cp) { - // if we would not skip these code points, we would get a - // "missing low surrogate" exception - continue; - } + // string to store the code point as in \uxxxx format + std::string json_text = "\""; - // string to store the code point as in \uxxxx format - std::string escaped_string; - // string to store the code point as unescaped character sequence - std::string unescaped_string; - - if (cp < 0x10000u) - { - // code points in the Basic Multilingual Plane can be - // represented with one \\uxxxx sequence - escaped_string = codepoint_to_unicode(cp); - - // All Unicode characters may be placed within the quotation - // marks, except for the characters that must be escaped: - // quotation mark, reverse solidus, and the control characters - // (U+0000 through U+001F); we ignore these code points as - // they are checked with codepoint_to_unicode. - if (cp > 0x1f and cp != 0x22 and cp != 0x5c) + // decide whether to use one or two \uxxxx sequences + if (cp < 0x10000u) { - unescaped_string = dummy_lexer.to_unicode(cp); + // The Unicode standard permanently reserves these code point + // values for UTF-16 encoding of the high and low surrogates, and + // they will never be assigned a character, so there should be no + // reason to encode them. The official Unicode standard says that + // no UTF forms, including UTF-16, can encode these code points. + if (cp >= 0xD800u and cp <= 0xDFFFu) + { + // if we would not skip these code points, we would get a + // "missing low surrogate" exception + continue; + } + + // code points in the Basic Multilingual Plane can be + // represented with one \uxxxx sequence + json_text += codepoint_to_unicode(cp); + } + else + { + // To escape an extended character that is not in the Basic + // Multilingual Plane, the character is represented as a + // 12-character sequence, encoding the UTF-16 surrogate pair + const auto codepoint1 = 0xd800u + (((cp - 0x10000u) >> 10) & 0x3ffu); + const auto codepoint2 = 0xdc00u + ((cp - 0x10000u) & 0x3ffu); + json_text += codepoint_to_unicode(codepoint1) + codepoint_to_unicode(codepoint2); + } + + json_text += "\""; + CAPTURE(json_text); + CHECK_NOTHROW(json::parse(json_text)); + } + } + + SECTION("incorrect sequences") + { + SECTION("high surrogate without low surrogate") + { + // D800..DBFF are high surrogates and must be followed by low + // surrogates DC00..DFFF; here, nothing follows + for (std::size_t cp = 0xD800u; cp <= 0xDBFFu; ++cp) + { + std::string json_text = "\"" + codepoint_to_unicode(cp) + "\""; + CAPTURE(json_text); + CHECK_THROWS_AS(json::parse(json_text), json::parse_error); } } - else + +#if 0 + SECTION("high surrogate with wrong low surrogate") { - // To escape an extended character that is not in the Basic - // Multilingual Plane, the character is represented as a - // 12-character sequence, encoding the UTF-16 surrogate pair - const auto codepoint1 = 0xd800u + (((cp - 0x10000u) >> 10) & 0x3ffu); - const auto codepoint2 = 0xdc00u + ((cp - 0x10000u) & 0x3ffu); - escaped_string = codepoint_to_unicode(codepoint1); - escaped_string += codepoint_to_unicode(codepoint2); - unescaped_string += dummy_lexer.to_unicode(codepoint1, codepoint2); + // D800..DBFF are high surrogates and must be followed by low + // surrogates DC00..DFFF; here a different sequence follows + for (std::size_t cp1 = 0xD800u; cp1 <= 0xDBFFu; ++cp1) + { + for (std::size_t cp2 = 0x0000u; cp2 <= 0xFFFFu; ++cp2) + { + if (0xDC00u <= cp2 and cp2 <= 0xDFFFu) + { + continue; + } + + std::string json_text = "\"" + codepoint_to_unicode(cp1) + codepoint_to_unicode(cp2) + "\""; + CAPTURE(json_text); + CHECK_THROWS_AS(json::parse(json_text), json::parse_error); + } + } + } +#endif + + SECTION("low surrogate without high surrogate") + { + // low surrogates DC00..DFFF must follow high surrogates; here, + // they occur alone + for (std::size_t cp = 0xDC00u; cp <= 0xDFFFu; ++cp) + { + std::string json_text = "\"" + codepoint_to_unicode(cp) + "\""; + CAPTURE(json_text); + CHECK_THROWS_AS(json::parse(json_text), json::parse_error); + } } - // all other code points are valid and must not yield parse errors - CAPTURE(cp); - CAPTURE(escaped_string); - CAPTURE(unescaped_string); - - json j1, j2, j3, j4; - CHECK_NOTHROW(j1 = json::parse("\"" + escaped_string + "\"")); - CHECK_NOTHROW(j2 = json::parse(j1.dump())); - CHECK(j1 == j2); - - CHECK_NOTHROW(j3 = json::parse("\"" + unescaped_string + "\"")); - CHECK_NOTHROW(j4 = json::parse(j3.dump())); - CHECK(j3 == j4); } } - */ SECTION("read all unicode characters") { From cfc2e8391cf6cfe098455f9b77eb9c3695bb41c6 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Mon, 24 Apr 2017 15:07:43 +0200 Subject: [PATCH 43/44] :hammer: removed too long running tests --- test/src/unit-unicode.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/src/unit-unicode.cpp b/test/src/unit-unicode.cpp index 67e97346..120941ee 100644 --- a/test/src/unit-unicode.cpp +++ b/test/src/unit-unicode.cpp @@ -913,6 +913,7 @@ TEST_CASE("Unicode", "[hide]") } } +#if 0 SECTION("incorrect sequences") { SECTION("high surrogate without low surrogate") @@ -927,7 +928,6 @@ TEST_CASE("Unicode", "[hide]") } } -#if 0 SECTION("high surrogate with wrong low surrogate") { // D800..DBFF are high surrogates and must be followed by low @@ -947,7 +947,6 @@ TEST_CASE("Unicode", "[hide]") } } } -#endif SECTION("low surrogate without high surrogate") { @@ -962,6 +961,7 @@ TEST_CASE("Unicode", "[hide]") } } +#endif } SECTION("read all unicode characters") From 8b9f51179e57c758da4a7042831e68901606d5c7 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Mon, 24 Apr 2017 17:46:21 +0200 Subject: [PATCH 44/44] :sparkles: started working on #458 a simple acceptor function --- src/json.hpp | 144 +++++++++++ test/src/unit-class_parser.cpp | 445 +++++++++++++++++++++++++++++++++ 2 files changed, 589 insertions(+) diff --git a/src/json.hpp b/src/json.hpp index 99a6e854..a0fe5e8e 100644 --- a/src/json.hpp +++ b/src/json.hpp @@ -12501,6 +12501,7 @@ scan_number_done: @brief public parser interface @param[in] strict whether to expect the last token to be EOF + @return parsed JSON value @throw parse_error.101 in case of an unexpected token @throw parse_error.102 if to_unicode fails or surrogate error @@ -12524,6 +12525,30 @@ scan_number_done: return result.is_discarded() ? basic_json() : std::move(result); } + /*! + @brief public accept interface + + @param[in] strict whether to expect the last token to be EOF + @return whether the input is a proper JSON text + */ + bool accept(const bool strict = true) + { + // read first token + get_token(); + + if (not accept_internal()) + { + return false; + } + + if (strict and last_token != lexer::token_type::end_of_input) + { + return false; + } + + return true; + } + private: /*! @brief the actual parser @@ -12745,6 +12770,125 @@ scan_number_done: return result; } + /*! + @brief the acutal acceptor + */ + bool accept_internal() + { + switch (last_token) + { + case lexer::token_type::begin_object: + { + // read next token + get_token(); + + // closing } -> we are done + if (last_token == lexer::token_type::end_object) + { + get_token(); + return true; + } + + // parse values + while (true) + { + // parse key + if (last_token != lexer::token_type::value_string) + { + return false; + } + + // parse separator (:) + get_token(); + if (last_token != lexer::token_type::name_separator) + { + return false; + } + + // parse value + get_token(); + if (not accept_internal()) + { + return false; + } + + // comma -> next value + if (last_token == lexer::token_type::value_separator) + { + get_token(); + continue; + } + + // closing } + if (last_token != lexer::token_type::end_object) + { + return false; + } + + get_token(); + return true; + } + } + + case lexer::token_type::begin_array: + { + // read next token + get_token(); + + // closing ] -> we are done + if (last_token == lexer::token_type::end_array) + { + get_token(); + return true; + } + + // parse values + while (true) + { + // parse value + if (not accept_internal()) + { + return false; + } + + // comma -> next value + if (last_token == lexer::token_type::value_separator) + { + get_token(); + continue; + } + + // closing ] + if (last_token != lexer::token_type::end_array) + { + return false; + } + + get_token(); + return true; + } + } + + case lexer::token_type::literal_null: + case lexer::token_type::value_string: + case lexer::token_type::literal_true: + case lexer::token_type::literal_false: + case lexer::token_type::value_unsigned: + case lexer::token_type::value_integer: + case lexer::token_type::value_float: + { + get_token(); + return true; + } + + default: + { + // the last token was unexpected + return false; + } + } + } + /// get next token from lexer typename lexer::token_type get_token() { diff --git a/test/src/unit-class_parser.cpp b/test/src/unit-class_parser.cpp index b631a978..5866b901 100644 --- a/test/src/unit-class_parser.cpp +++ b/test/src/unit-class_parser.cpp @@ -350,6 +350,268 @@ TEST_CASE("parser class") } } + SECTION("accept") + { + SECTION("null") + { + CHECK(parse_string("null").accept()); + } + + SECTION("true") + { + CHECK(parse_string("true").accept()); + } + + SECTION("false") + { + CHECK(parse_string("false").accept()); + } + + SECTION("array") + { + SECTION("empty array") + { + CHECK(parse_string("[]").accept()); + CHECK(parse_string("[ ]").accept()); + } + + SECTION("nonempty array") + { + CHECK(parse_string("[true, false, null]").accept()); + } + } + + SECTION("object") + { + SECTION("empty object") + { + CHECK(parse_string("{}").accept()); + CHECK(parse_string("{ }").accept()); + } + + SECTION("nonempty object") + { + CHECK(parse_string("{\"\": true, \"one\": 1, \"two\": null}").accept()); + } + } + + SECTION("string") + { + // empty string + CHECK(parse_string("\"\"").accept()); + + SECTION("errors") + { + // error: tab in string + CHECK(parse_string("\"\t\"").accept() == false); + // error: newline in string + CHECK(parse_string("\"\n\"").accept() == false); + CHECK(parse_string("\"\r\"").accept() == false); + // error: backspace in string + CHECK(parse_string("\"\b\"").accept() == false); + // improve code coverage + CHECK(parse_string("\uFF01").accept() == false); + CHECK(parse_string("[-4:1,]").accept() == false); + // unescaped control characters + CHECK(parse_string("\"\x00\"").accept() == false); + CHECK(parse_string("\"\x01\"").accept() == false); + CHECK(parse_string("\"\x02\"").accept() == false); + CHECK(parse_string("\"\x03\"").accept() == false); + CHECK(parse_string("\"\x04\"").accept() == false); + CHECK(parse_string("\"\x05\"").accept() == false); + CHECK(parse_string("\"\x06\"").accept() == false); + CHECK(parse_string("\"\x07\"").accept() == false); + CHECK(parse_string("\"\x08\"").accept() == false); + CHECK(parse_string("\"\x09\"").accept() == false); + CHECK(parse_string("\"\x0a\"").accept() == false); + CHECK(parse_string("\"\x0b\"").accept() == false); + CHECK(parse_string("\"\x0c\"").accept() == false); + CHECK(parse_string("\"\x0d\"").accept() == false); + CHECK(parse_string("\"\x0e\"").accept() == false); + CHECK(parse_string("\"\x0f\"").accept() == false); + CHECK(parse_string("\"\x10\"").accept() == false); + CHECK(parse_string("\"\x11\"").accept() == false); + CHECK(parse_string("\"\x12\"").accept() == false); + CHECK(parse_string("\"\x13\"").accept() == false); + CHECK(parse_string("\"\x14\"").accept() == false); + CHECK(parse_string("\"\x15\"").accept() == false); + CHECK(parse_string("\"\x16\"").accept() == false); + CHECK(parse_string("\"\x17\"").accept() == false); + CHECK(parse_string("\"\x18\"").accept() == false); + CHECK(parse_string("\"\x19\"").accept() == false); + CHECK(parse_string("\"\x1a\"").accept() == false); + CHECK(parse_string("\"\x1b\"").accept() == false); + CHECK(parse_string("\"\x1c\"").accept() == false); + CHECK(parse_string("\"\x1d\"").accept() == false); + CHECK(parse_string("\"\x1e\"").accept() == false); + CHECK(parse_string("\"\x1f\"").accept() == false); + } + + SECTION("escaped") + { + // quotation mark "\"" + auto r1 = R"("\"")"_json; + CHECK(parse_string("\"\\\"\"").accept()); + // reverse solidus "\\" + auto r2 = R"("\\")"_json; + CHECK(parse_string("\"\\\\\"").accept()); + // solidus + CHECK(parse_string("\"\\/\"").accept()); + // backspace + CHECK(parse_string("\"\\b\"").accept()); + // formfeed + CHECK(parse_string("\"\\f\"").accept()); + // newline + CHECK(parse_string("\"\\n\"").accept()); + // carriage return + CHECK(parse_string("\"\\r\"").accept()); + // horizontal tab + CHECK(parse_string("\"\\t\"").accept()); + + CHECK(parse_string("\"\\u0001\"").accept()); + CHECK(parse_string("\"\\u000a\"").accept()); + CHECK(parse_string("\"\\u00b0\"").accept()); + CHECK(parse_string("\"\\u0c00\"").accept()); + CHECK(parse_string("\"\\ud000\"").accept()); + CHECK(parse_string("\"\\u000E\"").accept()); + CHECK(parse_string("\"\\u00F0\"").accept()); + CHECK(parse_string("\"\\u0100\"").accept()); + CHECK(parse_string("\"\\u2000\"").accept()); + CHECK(parse_string("\"\\uFFFF\"").accept()); + CHECK(parse_string("\"\\u20AC\"").accept()); + CHECK(parse_string("\"€\"").accept()); + CHECK(parse_string("\"🎈\"").accept()); + + CHECK(parse_string("\"\\ud80c\\udc60\"").accept()); + CHECK(parse_string("\"\\ud83c\\udf1e\"").accept()); + } + } + + SECTION("number") + { + SECTION("integers") + { + SECTION("without exponent") + { + CHECK(parse_string("-128").accept()); + CHECK(parse_string("-0").accept()); + CHECK(parse_string("0").accept()); + CHECK(parse_string("128").accept()); + } + + SECTION("with exponent") + { + CHECK(parse_string("0e1").accept()); + CHECK(parse_string("0E1").accept()); + + CHECK(parse_string("10000E-4").accept()); + CHECK(parse_string("10000E-3").accept()); + CHECK(parse_string("10000E-2").accept()); + CHECK(parse_string("10000E-1").accept()); + CHECK(parse_string("10000E0").accept()); + CHECK(parse_string("10000E1").accept()); + CHECK(parse_string("10000E2").accept()); + CHECK(parse_string("10000E3").accept()); + CHECK(parse_string("10000E4").accept()); + + CHECK(parse_string("10000e-4").accept()); + CHECK(parse_string("10000e-3").accept()); + CHECK(parse_string("10000e-2").accept()); + CHECK(parse_string("10000e-1").accept()); + CHECK(parse_string("10000e0").accept()); + CHECK(parse_string("10000e1").accept()); + CHECK(parse_string("10000e2").accept()); + CHECK(parse_string("10000e3").accept()); + CHECK(parse_string("10000e4").accept()); + + CHECK(parse_string("-0e1").accept()); + CHECK(parse_string("-0E1").accept()); + CHECK(parse_string("-0E123").accept()); + } + + SECTION("edge cases") + { + // From RFC7159, Section 6: + // Note that when such software is used, numbers that are + // integers and are in the range [-(2**53)+1, (2**53)-1] + // are interoperable in the sense that implementations will + // agree exactly on their numeric values. + + // -(2**53)+1 + CHECK(parse_string("-9007199254740991").accept()); + // (2**53)-1 + CHECK(parse_string("9007199254740991").accept()); + } + + SECTION("over the edge cases") // issue #178 - Integer conversion to unsigned (incorrect handling of 64 bit integers) + { + // While RFC7159, Section 6 specifies a preference for support + // for ranges in range of IEEE 754-2008 binary64 (double precision) + // this does not accommodate 64 bit integers without loss of accuracy. + // As 64 bit integers are now widely used in software, it is desirable + // to expand support to to the full 64 bit (signed and unsigned) range + // i.e. -(2**63) -> (2**64)-1. + + // -(2**63) ** Note: compilers see negative literals as negated positive numbers (hence the -1)) + CHECK(parse_string("-9223372036854775808").accept()); + // (2**63)-1 + CHECK(parse_string("9223372036854775807").accept()); + // (2**64)-1 + CHECK(parse_string("18446744073709551615").accept()); + } + } + + SECTION("floating-point") + { + SECTION("without exponent") + { + CHECK(parse_string("-128.5").accept()); + CHECK(parse_string("0.999").accept()); + CHECK(parse_string("128.5").accept()); + CHECK(parse_string("-0.0").accept()); + } + + SECTION("with exponent") + { + CHECK(parse_string("-128.5E3").accept()); + CHECK(parse_string("-128.5E-3").accept()); + CHECK(parse_string("-0.0e1").accept()); + CHECK(parse_string("-0.0E1").accept()); + } + } + + SECTION("overflow") + { + // overflows during parsing yield an exception, but is accepted anyway + CHECK(parse_string("1.18973e+4932").accept()); + } + + SECTION("invalid numbers") + { + CHECK(parse_string("01").accept() == false); + CHECK(parse_string("--1").accept() == false); + CHECK(parse_string("1.").accept() == false); + CHECK(parse_string("1E").accept() == false); + CHECK(parse_string("1E-").accept() == false); + CHECK(parse_string("1.E1").accept() == false); + CHECK(parse_string("-1E").accept() == false); + CHECK(parse_string("-0E#").accept() == false); + CHECK(parse_string("-0E-#").accept() == false); + CHECK(parse_string("-0#").accept() == false); + CHECK(parse_string("-0.0:").accept() == false); + CHECK(parse_string("-0.0Z").accept() == false); + CHECK(parse_string("-0E123:").accept() == false); + CHECK(parse_string("-0e0-:").accept() == false); + CHECK(parse_string("-0e-:").accept() == false); + CHECK(parse_string("-0f").accept() == false); + + // numbers must not begin with "+" + CHECK(parse_string("+1").accept() == false); + CHECK(parse_string("+0").accept() == false); + } + } + } + SECTION("parse errors") { // unexpected end of number @@ -661,6 +923,189 @@ TEST_CASE("parser class") "[json.exception.parse_error.101] parse error at 13: syntax error - invalid string: surrogate U+D80C must be followed by U+DC00..U+DFFF instead of U+FFFF; last read '\"\\uD80C\\uFFFF'"); } + SECTION("parse errors (accept)") + { + // unexpected end of number + CHECK(parse_string("0.").accept() == false); + CHECK(parse_string("-").accept() == false); + CHECK(parse_string("--").accept() == false); + CHECK(parse_string("-0.").accept() == false); + CHECK(parse_string("-.").accept() == false); + CHECK(parse_string("-:").accept() == false); + CHECK(parse_string("0.:").accept() == false); + CHECK(parse_string("e.").accept() == false); + CHECK(parse_string("1e.").accept() == false); + CHECK(parse_string("1e/").accept() == false); + CHECK(parse_string("1e:").accept() == false); + CHECK(parse_string("1E.").accept() == false); + CHECK(parse_string("1E/").accept() == false); + CHECK(parse_string("1E:").accept() == false); + + // unexpected end of null + CHECK(parse_string("n").accept() == false); + CHECK(parse_string("nu").accept() == false); + CHECK(parse_string("nul").accept() == false); + + // unexpected end of true + CHECK(parse_string("t").accept() == false); + CHECK(parse_string("tr").accept() == false); + CHECK(parse_string("tru").accept() == false); + + // unexpected end of false + CHECK(parse_string("f").accept() == false); + CHECK(parse_string("fa").accept() == false); + CHECK(parse_string("fal").accept() == false); + CHECK(parse_string("fals").accept() == false); + + // missing/unexpected end of array + CHECK(parse_string("[").accept() == false); + CHECK(parse_string("[1").accept() == false); + CHECK(parse_string("[1,").accept() == false); + CHECK(parse_string("[1,]").accept() == false); + CHECK(parse_string("]").accept() == false); + + // missing/unexpected end of object + CHECK(parse_string("{").accept() == false); + CHECK(parse_string("{\"foo\"").accept() == false); + CHECK(parse_string("{\"foo\":").accept() == false); + CHECK(parse_string("{\"foo\":}").accept() == false); + CHECK(parse_string("{\"foo\":1,}").accept() == false); + CHECK(parse_string("}").accept() == false); + + // missing/unexpected end of string + CHECK(parse_string("\"").accept() == false); + CHECK(parse_string("\"\\\"").accept() == false); + CHECK(parse_string("\"\\u\"").accept() == false); + CHECK(parse_string("\"\\u0\"").accept() == false); + CHECK(parse_string("\"\\u01\"").accept() == false); + CHECK(parse_string("\"\\u012\"").accept() == false); + CHECK(parse_string("\"\\u").accept() == false); + CHECK(parse_string("\"\\u0").accept() == false); + CHECK(parse_string("\"\\u01").accept() == false); + CHECK(parse_string("\"\\u012").accept() == false); + + // invalid escapes + for (int c = 1; c < 128; ++c) + { + auto s = std::string("\"\\") + std::string(1, static_cast(c)) + "\""; + + switch (c) + { + // valid escapes + case ('"'): + case ('\\'): + case ('/'): + case ('b'): + case ('f'): + case ('n'): + case ('r'): + case ('t'): + { + CHECK(parse_string(s.c_str()).accept()); + break; + } + + // \u must be followed with four numbers, so we skip it here + case ('u'): + { + break; + } + + // any other combination of backslash and character is invalid + default: + { + CHECK(parse_string(s.c_str()).accept() == false); + break; + } + } + } + + // invalid \uxxxx escapes + { + // check whether character is a valid hex character + const auto valid = [](int c) + { + switch (c) + { + case ('0'): + case ('1'): + case ('2'): + case ('3'): + case ('4'): + case ('5'): + case ('6'): + case ('7'): + case ('8'): + case ('9'): + case ('a'): + case ('b'): + case ('c'): + case ('d'): + case ('e'): + case ('f'): + case ('A'): + case ('B'): + case ('C'): + case ('D'): + case ('E'): + case ('F'): + { + return true; + } + + default: + { + return false; + } + } + }; + + for (int c = 1; c < 128; ++c) + { + std::string s = "\"\\u"; + + // create a string with the iterated character at each position + auto s1 = s + "000" + std::string(1, static_cast(c)) + "\""; + auto s2 = s + "00" + std::string(1, static_cast(c)) + "0\""; + auto s3 = s + "0" + std::string(1, static_cast(c)) + "00\""; + auto s4 = s + std::string(1, static_cast(c)) + "000\""; + + if (valid(c)) + { + CAPTURE(s1); + CHECK(parse_string(s1.c_str()).accept()); + CAPTURE(s2); + CHECK(parse_string(s2.c_str()).accept()); + CAPTURE(s3); + CHECK(parse_string(s3.c_str()).accept()); + CAPTURE(s4); + CHECK(parse_string(s4.c_str()).accept()); + } + else + { + CAPTURE(s1); + CHECK(parse_string(s1.c_str()).accept() == false); + + CAPTURE(s2); + CHECK(parse_string(s2.c_str()).accept() == false); + + CAPTURE(s3); + CHECK(parse_string(s3.c_str()).accept() == false); + + CAPTURE(s4); + CHECK(parse_string(s4.c_str()).accept() == false); + } + } + } + + // missing part of a surrogate pair + CHECK(parse_string("\"\\uD80C\"").accept() == false); + // invalid surrogate pair + CHECK(parse_string("\"\\uD80C\\uD80C\"").accept() == false); + CHECK(parse_string("\"\\uD80C\\u0000\"").accept() == false); + CHECK(parse_string("\"\\uD80C\\uFFFF\"").accept() == false); + } + SECTION("tests found by mutate++") { // test case to make sure no comma preceeds the first key