From 99e0d8b339fca017c0f3b7a55d85a087bcaace0a Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Sun, 26 Mar 2017 15:29:08 +0200 Subject: [PATCH] :hammer: fixed \uxxxx parsing --- src/json.hpp | 37 +++++------ test/src/unit-class_parser.cpp | 115 +++++++++++++++++++++------------ 2 files changed, 90 insertions(+), 62 deletions(-) diff --git a/src/json.hpp b/src/json.hpp index 3caa8a6c..6cbcdf28 100644 --- a/src/json.hpp +++ b/src/json.hpp @@ -42,6 +42,7 @@ SOFTWARE. #include // forward_list #include // function, hash, less #include // initializer_list +#include // hex #include // istream, ostream #include // advance, begin, back_inserter, bidirectional_iterator_tag, distance, end, inserter, iterator, iterator_traits, next, random_access_iterator_tag, reverse_iterator #include // numeric_limits @@ -10542,36 +10543,30 @@ class basic_json // must be called after \u was read; returns following xxxx as hex or -1 when error int get_codepoint() { - // read xxxx of \uxxxx - std::vector buffer(5, '\0'); + // a mapping to discover hex numbers + static int8_t ascii_to_hex[256] = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}; + + int codepoint = 0; + for (size_t i = 0; i < 4; ++i) { - get(); - if (JSON_UNLIKELY(current != std::char_traits::eof())) + const int8_t digit = ascii_to_hex[static_cast(get())]; + if (JSON_UNLIKELY(digit == -1)) { - buffer[i] = static_cast(current); + return -1; } else { - // error message will be created by caller - return -1; + codepoint += digit; + } + + if (i != 3) + { + codepoint <<= 4; } } - char* endptr; - errno = 0; - - const int codepoint = static_cast(std::strtoul(buffer.data(), &endptr, 16)); - - if (JSON_LIKELY(errno == 0 and endptr == buffer.data() + 4)) - { - return codepoint; - } - else - { - // conversion incomplete or failure - return -1; - } + return codepoint; } token_type scan_string() diff --git a/test/src/unit-class_parser.cpp b/test/src/unit-class_parser.cpp index fe2a8fbe..01812259 100644 --- a/test/src/unit-class_parser.cpp +++ b/test/src/unit-class_parser.cpp @@ -91,18 +91,18 @@ TEST_CASE("parser class") // error: tab in string CHECK_THROWS_AS(json::parser("\"\t\"").parse(), json::parse_error); CHECK_THROWS_WITH(json::parser("\"\t\"").parse(), - "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); + "[json.exception.parse_error.101] parse error at 2: syntax error - invalid string: control characters (U+0000 through U+001f) must be escaped; last read '\"'"); // error: newline in string CHECK_THROWS_AS(json::parser("\"\n\"").parse(), json::parse_error); CHECK_THROWS_AS(json::parser("\"\r\"").parse(), json::parse_error); CHECK_THROWS_WITH(json::parser("\"\n\"").parse(), - "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); + "[json.exception.parse_error.101] parse error at 2: syntax error - invalid string: control characters (U+0000 through U+001f) must be escaped; last read '\"'"); CHECK_THROWS_WITH(json::parser("\"\r\"").parse(), - "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); + "[json.exception.parse_error.101] parse error at 2: syntax error - invalid string: control characters (U+0000 through U+001f) must be escaped; last read '\"'"); // error: backspace in string CHECK_THROWS_AS(json::parser("\"\b\"").parse(), json::parse_error); CHECK_THROWS_WITH(json::parser("\"\b\"").parse(), - "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); + "[json.exception.parse_error.101] parse error at 2: syntax error - invalid string: control characters (U+0000 through U+001f) must be escaped; last read '\"'"); // improve code coverage CHECK_THROWS_AS(json::parser("\uFF01").parse(), json::parse_error); CHECK_THROWS_AS(json::parser("[-4:1,]").parse(), json::parse_error); @@ -393,34 +393,37 @@ TEST_CASE("parser class") CHECK_THROWS_AS(json::parser("n").parse(), json::parse_error); CHECK_THROWS_AS(json::parser("nu").parse(), json::parse_error); CHECK_THROWS_AS(json::parser("nul").parse(), json::parse_error); - CHECK_THROWS_WITH(json::parser("n").parse(), "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected 'n'"); + CHECK_THROWS_WITH(json::parser("n").parse(), + "[json.exception.parse_error.101] parse error at 2: syntax error - invalid literal; expected 'null'; last read 'n'"); CHECK_THROWS_WITH(json::parser("nu").parse(), - "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected 'n'"); + "[json.exception.parse_error.101] parse error at 3: syntax error - invalid literal; expected 'null'; last read 'nu'"); CHECK_THROWS_WITH(json::parser("nul").parse(), - "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected 'n'"); + "[json.exception.parse_error.101] parse error at 4: syntax error - invalid literal; expected 'null'; last read 'nul'"); // unexpected end of true CHECK_THROWS_AS(json::parser("t").parse(), json::parse_error); CHECK_THROWS_AS(json::parser("tr").parse(), json::parse_error); CHECK_THROWS_AS(json::parser("tru").parse(), json::parse_error); - CHECK_THROWS_WITH(json::parser("t").parse(), "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected 't'"); + CHECK_THROWS_WITH(json::parser("t").parse(), + "[json.exception.parse_error.101] parse error at 2: syntax error - invalid literal; expected 'true'; last read 't'"); CHECK_THROWS_WITH(json::parser("tr").parse(), - "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected 't'"); + "[json.exception.parse_error.101] parse error at 3: syntax error - invalid literal; expected 'true'; last read 'tr'"); CHECK_THROWS_WITH(json::parser("tru").parse(), - "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected 't'"); + "[json.exception.parse_error.101] parse error at 4: syntax error - invalid literal; expected 'true'; last read 'tru'"); // unexpected end of false CHECK_THROWS_AS(json::parser("f").parse(), json::parse_error); CHECK_THROWS_AS(json::parser("fa").parse(), json::parse_error); CHECK_THROWS_AS(json::parser("fal").parse(), json::parse_error); CHECK_THROWS_AS(json::parser("fals").parse(), json::parse_error); - CHECK_THROWS_WITH(json::parser("f").parse(), "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected 'f'"); + CHECK_THROWS_WITH(json::parser("f").parse(), + "[json.exception.parse_error.101] parse error at 2: syntax error - invalid literal; expected 'false'; last read 'f'"); CHECK_THROWS_WITH(json::parser("fa").parse(), - "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected 'f'"); + "[json.exception.parse_error.101] parse error at 3: syntax error - invalid literal; expected 'false'; last read 'fa'"); CHECK_THROWS_WITH(json::parser("fal").parse(), - "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected 'f'"); + "[json.exception.parse_error.101] parse error at 4: syntax error - invalid literal; expected 'false'; last read 'fal'"); CHECK_THROWS_WITH(json::parser("fals").parse(), - "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected 'f'"); + "[json.exception.parse_error.101] parse error at 5: syntax error - invalid literal; expected 'false'; last read 'fals'"); // missing/unexpected end of array CHECK_THROWS_AS(json::parser("[").parse(), json::parse_error); @@ -471,25 +474,25 @@ TEST_CASE("parser class") CHECK_THROWS_AS(json::parser("\"\\u01").parse(), json::parse_error); CHECK_THROWS_AS(json::parser("\"\\u012").parse(), json::parse_error); CHECK_THROWS_WITH(json::parser("\"").parse(), - "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); + "[json.exception.parse_error.101] parse error at 2: syntax error - invalid string: missing closing quote; last read '\"'"); CHECK_THROWS_WITH(json::parser("\"\\\"").parse(), - "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); + "[json.exception.parse_error.101] parse error at 4: syntax error - invalid string: missing closing quote; last read '\"\\\"'"); CHECK_THROWS_WITH(json::parser("\"\\u\"").parse(), - "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); + "[json.exception.parse_error.101] parse error at 4: syntax error - invalid string: '\\u' must be followed by 4 hex digits; last read '\"\\u\"'"); CHECK_THROWS_WITH(json::parser("\"\\u0\"").parse(), - "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); + "[json.exception.parse_error.101] parse error at 5: syntax error - invalid string: '\\u' must be followed by 4 hex digits; last read '\"\\u0\"'"); CHECK_THROWS_WITH(json::parser("\"\\u01\"").parse(), - "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); + "[json.exception.parse_error.101] parse error at 6: syntax error - invalid string: '\\u' must be followed by 4 hex digits; last read '\"\\u01\"'"); CHECK_THROWS_WITH(json::parser("\"\\u012\"").parse(), - "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); + "[json.exception.parse_error.101] parse error at 7: syntax error - invalid string: '\\u' must be followed by 4 hex digits; last read '\"\\u012\"'"); CHECK_THROWS_WITH(json::parser("\"\\u").parse(), - "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); + "[json.exception.parse_error.101] parse error at 4: syntax error - invalid string: '\\u' must be followed by 4 hex digits; last read '\"\\u'"); CHECK_THROWS_WITH(json::parser("\"\\u0").parse(), - "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); + "[json.exception.parse_error.101] parse error at 5: syntax error - invalid string: '\\u' must be followed by 4 hex digits; last read '\"\\u0'"); CHECK_THROWS_WITH(json::parser("\"\\u01").parse(), - "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); + "[json.exception.parse_error.101] parse error at 6: syntax error - invalid string: '\\u' must be followed by 4 hex digits; last read '\"\\u01'"); CHECK_THROWS_WITH(json::parser("\"\\u012").parse(), - "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); + "[json.exception.parse_error.101] parse error at 7: syntax error - invalid string: '\\u' must be followed by 4 hex digits; last read '\"\\u012'"); // invalid escapes for (int c = 1; c < 128; ++c) @@ -522,8 +525,12 @@ TEST_CASE("parser class") default: { CHECK_THROWS_AS(json::parser(s.c_str()).parse(), json::parse_error); - CHECK_THROWS_WITH(json::parser(s.c_str()).parse(), - "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); + // only check error message if c is not a control character + if (c > 0x1f) + { + CHECK_THROWS_WITH(json::parser(s.c_str()).parse(), + "[json.exception.parse_error.101] parse error at 3: syntax error - invalid string: forbidden character after backspace; last read '\"\\" + std::string(1, c) + "'"); + } break; } } @@ -581,26 +588,52 @@ TEST_CASE("parser class") if (valid(c)) { + CAPTURE(s1); CHECK_NOTHROW(json::parser(s1.c_str()).parse()); + CAPTURE(s2); CHECK_NOTHROW(json::parser(s2.c_str()).parse()); + CAPTURE(s3); CHECK_NOTHROW(json::parser(s3.c_str()).parse()); + CAPTURE(s4); CHECK_NOTHROW(json::parser(s4.c_str()).parse()); } else { + CAPTURE(s1); CHECK_THROWS_AS(json::parser(s1.c_str()).parse(), json::parse_error); - CHECK_THROWS_AS(json::parser(s2.c_str()).parse(), json::parse_error); - CHECK_THROWS_AS(json::parser(s3.c_str()).parse(), json::parse_error); - CHECK_THROWS_AS(json::parser(s4.c_str()).parse(), json::parse_error); + // only check error message if c is not a control character + if (c > 0x1f) + { + CHECK_THROWS_WITH(json::parser(s1.c_str()).parse(), + "[json.exception.parse_error.101] parse error at 7: syntax error - invalid string: '\\u' must be followed by 4 hex digits; last read '" + s1.substr(0, 7) + "'"); + } - CHECK_THROWS_WITH(json::parser(s1.c_str()).parse(), - "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); - CHECK_THROWS_WITH(json::parser(s2.c_str()).parse(), - "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); - CHECK_THROWS_WITH(json::parser(s3.c_str()).parse(), - "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); - CHECK_THROWS_WITH(json::parser(s4.c_str()).parse(), - "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected '\"'"); + CAPTURE(s2); + CHECK_THROWS_AS(json::parser(s2.c_str()).parse(), json::parse_error); + // only check error message if c is not a control character + if (c > 0x1f) + { + CHECK_THROWS_WITH(json::parser(s2.c_str()).parse(), + "[json.exception.parse_error.101] parse error at 6: syntax error - invalid string: '\\u' must be followed by 4 hex digits; last read '" + s2.substr(0, 6) + "'"); + } + + CAPTURE(s3); + CHECK_THROWS_AS(json::parser(s3.c_str()).parse(), json::parse_error); + // only check error message if c is not a control character + if (c > 0x1f) + { + CHECK_THROWS_WITH(json::parser(s3.c_str()).parse(), + "[json.exception.parse_error.101] parse error at 5: syntax error - invalid string: '\\u' must be followed by 4 hex digits; last read '" + s3.substr(0, 5) + "'"); + } + + CAPTURE(s4); + CHECK_THROWS_AS(json::parser(s4.c_str()).parse(), json::parse_error); + // only check error message if c is not a control character + if (c > 0x1f) + { + CHECK_THROWS_WITH(json::parser(s4.c_str()).parse(), + "[json.exception.parse_error.101] parse error at 4: syntax error - invalid string: '\\u' must be followed by 4 hex digits; last read '" + s4.substr(0, 4) + "'"); + } } } } @@ -608,17 +641,17 @@ TEST_CASE("parser class") // missing part of a surrogate pair CHECK_THROWS_AS(json::parse("\"\\uD80C\""), json::parse_error); CHECK_THROWS_WITH(json::parse("\"\\uD80C\""), - "[json.exception.parse_error.102] parse error at 8: missing low surrogate"); + "[json.exception.parse_error.101] parse error at 8: syntax error - invalid string: missing low surrogate; last read '\"\\uD80C\"'"); // invalid surrogate pair CHECK_THROWS_AS(json::parse("\"\\uD80C\\uD80C\""), json::parse_error); CHECK_THROWS_AS(json::parse("\"\\uD80C\\u0000\""), json::parse_error); CHECK_THROWS_AS(json::parse("\"\\uD80C\\uFFFF\""), json::parse_error); CHECK_THROWS_WITH(json::parse("\"\\uD80C\\uD80C\""), - "[json.exception.parse_error.102] parse error at 14: missing or wrong low surrogate"); + "[json.exception.parse_error.101] parse error at 13: syntax error - invalid string: invalid low surrogate; last read '\"\\uD80C\\uD80C'"); CHECK_THROWS_WITH(json::parse("\"\\uD80C\\u0000\""), - "[json.exception.parse_error.102] parse error at 14: missing or wrong low surrogate"); + "[json.exception.parse_error.101] parse error at 13: syntax error - invalid string: invalid low surrogate; last read '\"\\uD80C\\u0000'"); CHECK_THROWS_WITH(json::parse("\"\\uD80C\\uFFFF\""), - "[json.exception.parse_error.102] parse error at 14: missing or wrong low surrogate"); + "[json.exception.parse_error.101] parse error at 13: syntax error - invalid string: invalid low surrogate; last read '\"\\uD80C\\uFFFF'"); } SECTION("tests found by mutate++")