🔨 improved diagnostic output
This commit is contained in:
		
							parent
							
								
									717106eced
								
							
						
					
					
						commit
						db9bf953f3
					
				
					 2 changed files with 39 additions and 28 deletions
				
			
		
							
								
								
									
										51
									
								
								src/json.hpp
									
										
									
									
									
								
							
							
						
						
									
										51
									
								
								src/json.hpp
									
										
									
									
									
								
							|  | @ -11127,6 +11127,13 @@ class basic_json | |||
|             return codepoint; | ||||
|         } | ||||
| 
 | ||||
|         static std::string codepoint_to_string(int codepoint) | ||||
|         { | ||||
|             std::stringstream ss; | ||||
|             ss << "U+" << std::setw(4) << std::uppercase << std::setfill('0') << std::hex << codepoint; | ||||
|             return ss.str(); | ||||
|         } | ||||
| 
 | ||||
|         token_type scan_string() | ||||
|         { | ||||
|             // reset yytext (ignore opening quote)
 | ||||
|  | @ -11237,13 +11244,13 @@ class basic_json | |||
|                                         } | ||||
|                                         else | ||||
|                                         { | ||||
|                                             error_message = "invalid string: invalid low surrogate"; | ||||
|                                             error_message = "invalid string: surrogate " + codepoint_to_string(codepoint1) + " must be followed by U+DC00..U+DFFF instead of " + codepoint_to_string(codepoint2); | ||||
|                                             return token_type::parse_error; | ||||
|                                         } | ||||
|                                     } | ||||
|                                     else | ||||
|                                     { | ||||
|                                         error_message = "invalid string: missing low surrogate"; | ||||
|                                         error_message = "invalid string: surrogate " + codepoint_to_string(codepoint1) + " must be followed by U+DC00..U+DFFF"; | ||||
|                                         return token_type::parse_error; | ||||
|                                     } | ||||
|                                 } | ||||
|  | @ -11251,7 +11258,7 @@ class basic_json | |||
|                                 { | ||||
|                                     if (JSON_UNLIKELY(0xDC00 <= codepoint1 and codepoint1 <= 0xDFFF)) | ||||
|                                     { | ||||
|                                         error_message = "invalid string: missing high surrogate"; | ||||
|                                         error_message = "invalid string: surrogate " + codepoint_to_string(codepoint1) + " must follow U+D800..U+DBFF"; | ||||
|                                         return token_type::parse_error; | ||||
|                                     } | ||||
| 
 | ||||
|  | @ -11336,7 +11343,7 @@ class basic_json | |||
|                     case 0x1e: | ||||
|                     case 0x1f: | ||||
|                     { | ||||
|                         error_message = "invalid string: control characters (U+0000 through U+001f) must be escaped"; | ||||
|                         error_message = "invalid string: control character " + codepoint_to_string(current) + " must be escaped"; | ||||
|                         return token_type::parse_error; | ||||
|                     } | ||||
| 
 | ||||
|  | @ -11480,7 +11487,7 @@ class basic_json | |||
|                             continue; | ||||
|                         } | ||||
| 
 | ||||
|                         error_message = "invalid string: not well-formed UTF-8 byte"; | ||||
|                         error_message = "invalid string: ill-formed UTF-8 byte"; | ||||
|                         return token_type::parse_error; | ||||
|                     } | ||||
| 
 | ||||
|  | @ -11500,7 +11507,7 @@ class basic_json | |||
|                             } | ||||
|                         } | ||||
| 
 | ||||
|                         error_message = "invalid string: not well-formed UTF-8 byte"; | ||||
|                         error_message = "invalid string: ill-formed UTF-8 byte"; | ||||
|                         return token_type::parse_error; | ||||
|                     } | ||||
| 
 | ||||
|  | @ -11534,7 +11541,7 @@ class basic_json | |||
|                             } | ||||
|                         } | ||||
| 
 | ||||
|                         error_message = "invalid string: not well-formed UTF-8 byte"; | ||||
|                         error_message = "invalid string: ill-formed UTF-8 byte"; | ||||
|                         return token_type::parse_error; | ||||
|                     } | ||||
| 
 | ||||
|  | @ -11554,7 +11561,7 @@ class basic_json | |||
|                             } | ||||
|                         } | ||||
| 
 | ||||
|                         error_message = "invalid string: not well-formed UTF-8 byte"; | ||||
|                         error_message = "invalid string: ill-formed UTF-8 byte"; | ||||
|                         return token_type::parse_error; | ||||
|                     } | ||||
| 
 | ||||
|  | @ -11579,7 +11586,7 @@ class basic_json | |||
|                             } | ||||
|                         } | ||||
| 
 | ||||
|                         error_message = "invalid string: not well-formed UTF-8 byte"; | ||||
|                         error_message = "invalid string: ill-formed UTF-8 byte"; | ||||
|                         return token_type::parse_error; | ||||
|                     } | ||||
| 
 | ||||
|  | @ -11606,7 +11613,7 @@ class basic_json | |||
|                             } | ||||
|                         } | ||||
| 
 | ||||
|                         error_message = "invalid string: not well-formed UTF-8 byte"; | ||||
|                         error_message = "invalid string: ill-formed UTF-8 byte"; | ||||
|                         return token_type::parse_error; | ||||
|                     } | ||||
| 
 | ||||
|  | @ -11631,14 +11638,14 @@ class basic_json | |||
|                             } | ||||
|                         } | ||||
| 
 | ||||
|                         error_message = "invalid string: not well-formed UTF-8 byte"; | ||||
|                         error_message = "invalid string: ill-formed UTF-8 byte"; | ||||
|                         return token_type::parse_error; | ||||
|                     } | ||||
| 
 | ||||
|                     // remaining bytes (80..C1 and F5..FF) are not well-formed
 | ||||
|                     // remaining bytes (80..C1 and F5..FF) are ill-formed
 | ||||
|                     default: | ||||
|                     { | ||||
|                         error_message = "invalid string: not well-formed UTF-8 byte"; | ||||
|                         error_message = "invalid string: ill-formed UTF-8 byte"; | ||||
|                         return token_type::parse_error; | ||||
|                     } | ||||
|                 } | ||||
|  | @ -11681,7 +11688,7 @@ class basic_json | |||
|             // be changed if minus sign, decimal point or exponent is read
 | ||||
|             token_type number_type = token_type::value_unsigned; | ||||
| 
 | ||||
|             // state: we just found out we need to scan a number
 | ||||
|             // state (init): we just found out we need to scan a number
 | ||||
|             switch (current) | ||||
|             { | ||||
|                 case '-': | ||||
|  | @ -12001,6 +12008,8 @@ scan_number_done: | |||
|                 } | ||||
|             } | ||||
| 
 | ||||
|             // this code is reached if we parse a floating-point number or if
 | ||||
|             // an integer conversion above failed
 | ||||
|             strtof(value_float, yytext.data(), nullptr); | ||||
|             return token_type::value_float; | ||||
|         } | ||||
|  | @ -12064,7 +12073,8 @@ scan_number_done: | |||
|         /// add a character to yytext
 | ||||
|         void add(int c) | ||||
|         { | ||||
|             // resize yytext if necessary
 | ||||
|             // resize yytext if necessary; this condition is deemed unlikely,
 | ||||
|             // because we start with a 1024-byte buffer
 | ||||
|             if (JSON_UNLIKELY((yylen + 1 > yytext.capacity()))) | ||||
|             { | ||||
|                 yytext.resize(2 * yytext.capacity(), '\0'); | ||||
|  | @ -12120,7 +12130,7 @@ scan_number_done: | |||
|             std::string s = ia->read(start_pos, chars_read - start_pos); | ||||
| 
 | ||||
|             // escape control characters
 | ||||
|             std::stringstream ss; | ||||
|             std::string result; | ||||
|             for (auto c : s) | ||||
|             { | ||||
|                 if (c == '\0' or c == std::char_traits<char>::eof()) | ||||
|  | @ -12131,16 +12141,16 @@ scan_number_done: | |||
|                 else if ('\x00' <= c and c <= '\x1f') | ||||
|                 { | ||||
|                     // escape control characters
 | ||||
|                     ss << "<U+" << std::setw(4) << std::setfill('0') << std::hex << int(c) << ">"; | ||||
|                     result += "<" + codepoint_to_string(c) + ">"; | ||||
|                 } | ||||
|                 else | ||||
|                 { | ||||
|                     // add character as is
 | ||||
|                     ss << c; | ||||
|                     result.append(1, c); | ||||
|                 } | ||||
|             } | ||||
| 
 | ||||
|             return ss.str(); | ||||
|             return result; | ||||
|         } | ||||
| 
 | ||||
|         /// return syntax error message
 | ||||
|  | @ -12204,7 +12214,8 @@ scan_number_done: | |||
|                 case '9': | ||||
|                     return scan_number(); | ||||
| 
 | ||||
|                 // end of input
 | ||||
|                 // end of input (the null byte is needed when parsing from
 | ||||
|                 // string literals)
 | ||||
|                 case '\0': | ||||
|                 case std::char_traits<char>::eof(): | ||||
|                     return token_type::end_of_input; | ||||
|  |  | |||
|  | @ -98,18 +98,18 @@ TEST_CASE("parser class") | |||
|                 // error: tab in string
 | ||||
|                 CHECK_THROWS_AS(parse_string("\"\t\"").parse(), json::parse_error); | ||||
|                 CHECK_THROWS_WITH(parse_string("\"\t\"").parse(), | ||||
|                                   "[json.exception.parse_error.101] parse error at 2: syntax error - invalid string: control characters (U+0000 through U+001f) must be escaped; last read '\"<U+0009>'"); | ||||
|                                   "[json.exception.parse_error.101] parse error at 2: syntax error - invalid string: control character U+0009 must be escaped; last read '\"<U+0009>'"); | ||||
|                 // error: newline in string
 | ||||
|                 CHECK_THROWS_AS(parse_string("\"\n\"").parse(), json::parse_error); | ||||
|                 CHECK_THROWS_AS(parse_string("\"\r\"").parse(), json::parse_error); | ||||
|                 CHECK_THROWS_WITH(parse_string("\"\n\"").parse(), | ||||
|                                   "[json.exception.parse_error.101] parse error at 2: syntax error - invalid string: control characters (U+0000 through U+001f) must be escaped; last read '\"<U+000a>'"); | ||||
|                                   "[json.exception.parse_error.101] parse error at 2: syntax error - invalid string: control character U+000A must be escaped; last read '\"<U+000A>'"); | ||||
|                 CHECK_THROWS_WITH(parse_string("\"\r\"").parse(), | ||||
|                                   "[json.exception.parse_error.101] parse error at 2: syntax error - invalid string: control characters (U+0000 through U+001f) must be escaped; last read '\"<U+000d>'"); | ||||
|                                   "[json.exception.parse_error.101] parse error at 2: syntax error - invalid string: control character U+000D must be escaped; last read '\"<U+000D>'"); | ||||
|                 // error: backspace in string
 | ||||
|                 CHECK_THROWS_AS(parse_string("\"\b\"").parse(), json::parse_error); | ||||
|                 CHECK_THROWS_WITH(parse_string("\"\b\"").parse(), | ||||
|                                   "[json.exception.parse_error.101] parse error at 2: syntax error - invalid string: control characters (U+0000 through U+001f) must be escaped; last read '\"<U+0008>'"); | ||||
|                                   "[json.exception.parse_error.101] parse error at 2: syntax error - invalid string: control character U+0008 must be escaped; last read '\"<U+0008>'"); | ||||
|                 // improve code coverage
 | ||||
|                 CHECK_THROWS_AS(parse_string("\uFF01").parse(), json::parse_error); | ||||
|                 CHECK_THROWS_AS(parse_string("[-4:1,]").parse(), json::parse_error); | ||||
|  | @ -648,17 +648,17 @@ TEST_CASE("parser class") | |||
|         // missing part of a surrogate pair
 | ||||
|         CHECK_THROWS_AS(json::parse("\"\\uD80C\""), json::parse_error); | ||||
|         CHECK_THROWS_WITH(json::parse("\"\\uD80C\""), | ||||
|                           "[json.exception.parse_error.101] parse error at 8: syntax error - invalid string: missing low surrogate; last read '\"\\uD80C\"'"); | ||||
|                           "[json.exception.parse_error.101] parse error at 8: syntax error - invalid string: surrogate U+D80C must be followed by U+DC00..U+DFFF; last read '\"\\uD80C\"'"); | ||||
|         // invalid surrogate pair
 | ||||
|         CHECK_THROWS_AS(json::parse("\"\\uD80C\\uD80C\""), json::parse_error); | ||||
|         CHECK_THROWS_AS(json::parse("\"\\uD80C\\u0000\""), json::parse_error); | ||||
|         CHECK_THROWS_AS(json::parse("\"\\uD80C\\uFFFF\""), json::parse_error); | ||||
|         CHECK_THROWS_WITH(json::parse("\"\\uD80C\\uD80C\""), | ||||
|                           "[json.exception.parse_error.101] parse error at 13: syntax error - invalid string: invalid low surrogate; last read '\"\\uD80C\\uD80C'"); | ||||
|                           "[json.exception.parse_error.101] parse error at 13: syntax error - invalid string: surrogate U+D80C must be followed by U+DC00..U+DFFF instead of U+D80C; last read '\"\\uD80C\\uD80C'"); | ||||
|         CHECK_THROWS_WITH(json::parse("\"\\uD80C\\u0000\""), | ||||
|                           "[json.exception.parse_error.101] parse error at 13: syntax error - invalid string: invalid low surrogate; last read '\"\\uD80C\\u0000'"); | ||||
|                           "[json.exception.parse_error.101] parse error at 13: syntax error - invalid string: surrogate U+D80C must be followed by U+DC00..U+DFFF instead of U+0000; last read '\"\\uD80C\\u0000'"); | ||||
|         CHECK_THROWS_WITH(json::parse("\"\\uD80C\\uFFFF\""), | ||||
|                           "[json.exception.parse_error.101] parse error at 13: syntax error - invalid string: invalid low surrogate; last read '\"\\uD80C\\uFFFF'"); | ||||
|                           "[json.exception.parse_error.101] parse error at 13: syntax error - invalid string: surrogate U+D80C must be followed by U+DC00..U+DFFF instead of U+FFFF; last read '\"\\uD80C\\uFFFF'"); | ||||
|     } | ||||
| 
 | ||||
|     SECTION("tests found by mutate++") | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue