From e24df7eca98ae8590616c103cafcff9ed3fa5b7c Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Sun, 23 Apr 2017 15:10:40 +0200 Subject: [PATCH] :memo: improved documentation --- src/json.hpp | 250 +++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 214 insertions(+), 36 deletions(-) diff --git a/src/json.hpp b/src/json.hpp index 1db82e12..99a6e854 100644 --- a/src/json.hpp +++ b/src/json.hpp @@ -6605,6 +6605,7 @@ class basic_json // output adapters // ///////////////////// + /// abstract output adapter interface template class output_adapter { @@ -6629,9 +6630,11 @@ class basic_json } }; + /// a type to simplify interfaces template using output_adapter_t = std::shared_ptr>; + /// output adapter for byte vectors template class output_vector_adapter : public output_adapter { @@ -6654,6 +6657,7 @@ class basic_json std::vector& v; }; + /// putput adatpter for output streams template class output_stream_adapter : public output_adapter { @@ -6676,6 +6680,7 @@ class basic_json std::basic_ostream& stream; }; + /// output adapter for basic_string template class output_string_adapter : public output_adapter { @@ -8773,7 +8778,7 @@ class basic_json } }; - // a type to simplify interfaces + /// a type to simplify interfaces using input_adapter_t = std::shared_ptr; /// input adapter for cached stream input @@ -8930,17 +8935,34 @@ class basic_json /// @{ private: + /*! + @brief deserialization of CBOR and MessagePack values + */ class binary_reader { public: + /*! + @brief create a binary reader + + @param[in] adapter input adapter to read from + */ explicit binary_reader(input_adapter_t adapter) : ia(adapter), is_little_endian(little_endianess()) - {} + { + assert(ia); + } /*! + @brief create a JSON value from CBOR input + @param[in] get_char whether a new character should be retrieved from the input (true, default) or whether the last read character should be considered instead + + @return JSON value created from CBOR input + + @throw parse_error.110 if input ended unexpectedly + @throw parse_error.112 if unsupported byte was read */ basic_json parse_cbor(const bool get_char = true) { @@ -9297,12 +9319,13 @@ class basic_json check_eof(); // code from RFC 7049, Appendix D, Figure 3: - // As half-precision floating-point numbers were only added to - // IEEE 754 in 2008, today's programming platforms often still - // only have limited support for them. It is very easy to - // include at least decoding support for them even without such - // support. An example of a small decoder for half-precision - // floating-point numbers in the C language is shown in Fig. 3. + // As half-precision floating-point numbers were only added + // to IEEE 754 in 2008, today's programming platforms often + // still only have limited support for them. It is very + // easy to include at least decoding support for them even + // without such support. An example of a small decoder for + // half-precision floating-point numbers in the C language + // is shown in Fig. 3. const int half = (byte1 << 8) + byte2; const int exp = (half >> 10) & 0x1f; const int mant = half & 0x3ff; @@ -9343,6 +9366,14 @@ class basic_json } } + /*! + @brief create a JSON value from MessagePack input + + @return JSON value created from MessagePack input + + @throw parse_error.110 if input ended unexpectedly + @throw parse_error.112 if unsupported byte was read + */ basic_json parse_msgpack() { switch (get()) @@ -9745,23 +9776,52 @@ class basic_json } } - private: - // from http://stackoverflow.com/a/1001328/266378 - static bool little_endianess() + /*! + @brief determine system byte order + + @return true iff system's byte order is little endian + + @note from http://stackoverflow.com/a/1001328/266378 + */ + static bool little_endianess() noexcept { int num = 1; return (*reinterpret_cast(&num) == 1); } + private: + /*! + @brief get next character from the input + + This function provides the interface to the used input adapter. It does + not throw in case the input reached EOF, but returns + `std::char_traits::eof()` in that case. + + @return character read from the input + */ int get() { ++chars_read; return (current = ia->get_character()); } + /* + @brief read a number from the input + + @tparam T the type of the number + + @return number of type @a T + + @note This function needs to respect the system's endianess, because + bytes in CBOR and MessagePack are stored in network order (big + endian) and therefore need reordering on little endian systems. + + @throw parse_error.110 if input has less than `sizeof(T)` bytes + */ template T get_number() { + // step 1: read input into array with system's byte order std::array vec; for (size_t i = 0; i < sizeof(T); ++i) { @@ -9779,11 +9839,21 @@ class basic_json } } + // step 2: convert array into number of type T and return T result; std::memcpy(&result, vec.data(), sizeof(T)); return result; } + /*! + @brief create a string by reading characters from the input + + @param[in] len number of bytes to read + + @return string created by reading @a len bytes + + @throw parse_error.110 if input has less than @a len bytes + */ std::string get_string(const size_t len) { std::string result; @@ -9796,6 +9866,18 @@ class basic_json return result; } + /*! + @brief reads a CBOR string + + This function first reads starting bytes to determine the expected + string length and then copies this number of bytes into a string. + Additionally, CBOR's strings with indefinite lengths are supported. + + @return string + + @throw parse_error.110 if input ended + @throw parse_error.113 if an unexpexted byte is read + */ std::string get_cbor_string() { check_eof(); @@ -9876,6 +9958,17 @@ class basic_json } } + /*! + @brief reads a MessagePack string + + This function first reads starting bytes to determine the expected + string length and then copies this number of bytes into a string. + + @return string + + @throw parse_error.110 if input ended + @throw parse_error.113 if an unexpexted byte is read + */ std::string get_msgpack_string() { check_eof(); @@ -9947,7 +10040,11 @@ class basic_json } } - void check_eof() + /*! + @brief check if input ended + @throw parse_error.110 if input ended + */ + void check_eof() const { if (JSON_UNLIKELY(current == std::char_traits::eof())) { @@ -9969,17 +10066,26 @@ class basic_json const bool is_little_endian = true; }; + /*! + @brief serialization to CBOR and MessagePack values + */ class binary_writer { public: - binary_writer() - : is_little_endian(little_endianess()) - {} + /*! + @brief create a binary writer + @param[in] adapter output adapter to write to + */ explicit binary_writer(output_adapter_t adapter) - : is_little_endian(little_endianess()), oa(adapter) - {} + : is_little_endian(binary_reader::little_endianess()), oa(adapter) + { + assert(oa); + } + /*! + @brief[in] j JSON value to serialize + */ void write_cbor(const basic_json& j) { switch (j.type()) @@ -10100,6 +10206,7 @@ class basic_json case value_t::string: { + // step 1: write control byte and the string length const auto N = j.m_value.string->size(); if (N <= 0x17) { @@ -10128,7 +10235,7 @@ class basic_json } // LCOV_EXCL_STOP - // append string + // step 2: write the string oa->write_characters(reinterpret_cast(j.m_value.string->c_str()), j.m_value.string->size()); break; @@ -10136,6 +10243,7 @@ class basic_json case value_t::array: { + // step 1: write control byte and the array size const auto N = j.m_value.array->size(); if (N <= 0x17) { @@ -10164,7 +10272,7 @@ class basic_json } // LCOV_EXCL_STOP - // append each element + // step 2: write each element for (const auto& el : *j.m_value.array) { write_cbor(el); @@ -10174,6 +10282,7 @@ class basic_json case value_t::object: { + // step 1: write control byte and the object size const auto N = j.m_value.object->size(); if (N <= 0x17) { @@ -10202,7 +10311,7 @@ class basic_json } // LCOV_EXCL_STOP - // append each element + // step 2: write each element for (const auto& el : *j.m_value.object) { write_cbor(el.first); @@ -10218,6 +10327,9 @@ class basic_json } } + /*! + @brief[in] j JSON value to serialize + */ void write_msgpack(const basic_json& j) { switch (j.type()) @@ -10353,6 +10465,7 @@ class basic_json case value_t::string: { + // step 1: write control byte and the string length const auto N = j.m_value.string->size(); if (N <= 31) { @@ -10378,7 +10491,7 @@ class basic_json write_number(static_cast(N)); } - // append string + // step 2: write the string oa->write_characters(reinterpret_cast(j.m_value.string->c_str()), j.m_value.string->size()); break; @@ -10386,6 +10499,7 @@ class basic_json case value_t::array: { + // step 1: write control byte and the array size const auto N = j.m_value.array->size(); if (N <= 15) { @@ -10405,7 +10519,7 @@ class basic_json write_number(static_cast(N)); } - // append each element + // step 2: write each element for (const auto& el : *j.m_value.array) { write_msgpack(el); @@ -10415,6 +10529,7 @@ class basic_json case value_t::object: { + // step 1: write control byte and the object size const auto N = j.m_value.object->size(); if (N <= 15) { @@ -10434,7 +10549,7 @@ class basic_json write_number(static_cast(N)); } - // append each element + // step 2: write each element for (const auto& el : *j.m_value.object) { write_msgpack(el.first); @@ -10451,12 +10566,24 @@ class basic_json } private: + /* + @brief write a number to output input + + @param[in] n number of type @a T + @tparam T the type of the number + + @note This function needs to respect the system's endianess, because + bytes in CBOR and MessagePack are stored in network order (big + endian) and therefore need reordering on little endian systems. + */ template void write_number(T n) { + // step 1: write number to array of length T std::array vec; std::memcpy(vec.data(), &n, sizeof(T)); + // step 2: write array to output (with possible reordering) for (size_t i = 0; i < sizeof(T); ++i) { // reverse byte order prior to conversion if necessary @@ -10471,13 +10598,6 @@ class basic_json } } - // from http://stackoverflow.com/a/1001328/266378 - static bool little_endianess() - { - int num = 1; - return (*reinterpret_cast(&num) == 1); - } - private: /// whether we can assume little endianess const bool is_little_endian = true; @@ -10928,12 +11048,19 @@ class basic_json // scan functions ///////////////////// - // must be called after \u was read; returns following xxxx as hex or -1 when error + /*! + @brief get codepoint from 4 hex characters following `\u` + + @return codepoint or -1 in case of an error (e.g. EOF or non-hex + character) + */ int get_codepoint() { + // this function only makes sense after reading `\u` assert(current == 'u'); int codepoint = 0; + // byte 1: \uXxxx switch (get()) { case '0': @@ -10993,6 +11120,7 @@ class basic_json return -1; } + // byte 2: \uxXxx switch (get()) { case '0': @@ -11052,6 +11180,7 @@ class basic_json return -1; } + // byte 3: \uxxXx switch (get()) { case '0': @@ -11111,6 +11240,7 @@ class basic_json return -1; } + // byte 4: \uxxxX switch (get()) { case '0': @@ -11173,6 +11303,10 @@ class basic_json return codepoint; } + /*! + @brief create diagnostic representation of a codepoint + @return string "U+XXXX" for codepoint XXXX + */ static std::string codepoint_to_string(int codepoint) { std::stringstream ss; @@ -11180,6 +11314,20 @@ class basic_json return ss.str(); } + /*! + @brief scan a string literal + + This function scans a string according to Sect. 7 of RFC 7159. While + scanning, bytes are escaped and copied into buffer yytext. Then the + function returns successfully, yytext is null-terminated and yylen + contains the number of bytes in the string. + + @return token_type::value_string if string could be successfully + scanned, token_type::parse_error otherwise + + @note In case of errors, variable error_message contains a textual + description. + */ token_type scan_string() { // reset yytext (ignore opening quote) @@ -11714,6 +11862,17 @@ class basic_json } /*! + @brief scan a number literal + + This function scans a string according to Sect. 6 of RFC 7159. + + The function is realized with a deterministic finite state machine + derived from the grammar described in RFC 7159. Starting in state + "init", the input is read and used to determined the next state. Only + state "done" accepts the number. State "error" is a trap state to model + errors. In the table below, "anything" means any character but the ones + listed before. + state | 0 | 1-9 | e E | + | - | . | anything ---------|----------|----------|----------|---------|---------|----------|----------- init | zero | any1 | [error] | [error] | minus | [error] | [error] @@ -11725,13 +11884,31 @@ class basic_json exponent | any2 | any2 | [error] | sign | sign | [error] | [error] sign | any2 | any2 | [error] | [error] | [error] | [error] | [error] any2 | any2 | any2 | done | done | done | done | done + + The state machine is realized with one label per state (prefixed with + "scan_number_") and `goto` statements between them. The state machine + contains cycles, but any cycle can be left when EOF is read. Therefore, + the function is guaranteed to terminate. + + During scanning, the read bytes are stored in yytext. This string is + then converted to a signed integer, an unsigned integer, or a + floating-point number. + + @return token_type::value_unsigned, token_type::value_integer, or + token_type::value_float if number could be successfully scanned, + token_type::parse_error otherwise + + @note The scanner is independent of the current locale. Internally, the + locale's decimal point is used instead of `.` to work with the + locale-dependent converters. */ token_type scan_number() { + // reset yytext to store the number's bytes reset(); - // the type of the parsed number; initially set to unsigned; will - // be changed if minus sign, decimal point or exponent is read + // the type of the parsed number; initially set to unsigned; will be + // changed if minus sign, decimal point or exponent is read token_type number_type = token_type::value_unsigned; // state (init): we just found out we need to scan a number @@ -12008,7 +12185,8 @@ scan_number_any2: } scan_number_done: - // unget the character after the number + // unget the character after the number (we only read it to know + // that we are done scanning a number) --chars_read; next_unget = true; @@ -12155,7 +12333,7 @@ scan_number_done: const std::string get_string() { // yytext cannot be returned as char*, because it may contain a - // null byte + // null byte (parsed as "\u0000") return std::string(yytext.data(), yylen); } @@ -12302,7 +12480,7 @@ scan_number_done: number_float_t value_float = 0; /// the decimal point - const char decimal_point_char = '\0'; + const char decimal_point_char = '.'; }; /*!