2016-08-04 19:55:47 +00:00
/*
__ _____ _____ _____
__ | | __ | | | | JSON for Modern C + + ( test suite )
2017-12-29 17:31:13 +00:00
| | | __ | | | | | | version 3.0 .1
2016-08-04 19:55:47 +00:00
| _____ | _____ | _____ | _ | ___ | https : //github.com/nlohmann/json
Licensed under the MIT License < http : //opensource.org/licenses/MIT>.
2017-01-02 08:40:00 +00:00
Copyright ( c ) 2013 - 2017 Niels Lohmann < http : //nlohmann.me>.
2016-08-04 19:55:47 +00:00
Permission is hereby granted , free of charge , to any person obtaining a copy
of this software and associated documentation files ( the " Software " ) , to deal
in the Software without restriction , including without limitation the rights
to use , copy , modify , merge , publish , distribute , sublicense , and / or sell
copies of the Software , and to permit persons to whom the Software is
furnished to do so , subject to the following conditions :
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software .
THE SOFTWARE IS PROVIDED " AS IS " , WITHOUT WARRANTY OF ANY KIND , EXPRESS OR
IMPLIED , INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY ,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT . IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM , DAMAGES OR OTHER
LIABILITY , WHETHER IN AN ACTION OF CONTRACT , TORT OR OTHERWISE , ARISING FROM ,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE .
*/
# include "catch.hpp"
# define private public
# include "json.hpp"
using nlohmann : : json ;
# include <fstream>
2017-07-09 06:04:49 +00:00
# include <iostream>
2016-08-04 19:55:47 +00:00
2017-07-08 10:03:12 +00:00
extern size_t calls ;
2017-06-21 16:50:11 +00:00
size_t calls = 0 ;
2017-05-15 19:07:58 +00:00
void check_utf8string ( bool success_expected , int byte1 , int byte2 , int byte3 , int byte4 ) ;
2017-04-23 18:32:05 +00:00
// create and check a JSON string with up to four UTF-8 bytes
void check_utf8string ( bool success_expected , int byte1 , int byte2 = - 1 , int byte3 = - 1 , int byte4 = - 1 )
2017-04-23 16:40:17 +00:00
{
2017-06-21 16:50:11 +00:00
if ( + + calls % 100000 = = 0 )
{
2017-07-22 11:35:04 +00:00
std : : cout < < calls < < " of 8860608 UTF-8 strings checked " < < std : : endl ;
2017-06-21 16:50:11 +00:00
}
2017-04-23 18:32:05 +00:00
std : : string json_string = " \" " ;
CAPTURE ( byte1 ) ;
json_string + = std : : string ( 1 , static_cast < char > ( byte1 ) ) ;
2017-04-23 16:40:17 +00:00
if ( byte2 ! = - 1 )
{
2017-04-23 18:32:05 +00:00
CAPTURE ( byte2 ) ;
json_string + = std : : string ( 1 , static_cast < char > ( byte2 ) ) ;
2017-04-23 16:40:17 +00:00
}
2017-04-23 18:32:05 +00:00
2017-04-23 16:40:17 +00:00
if ( byte3 ! = - 1 )
{
2017-04-23 18:32:05 +00:00
CAPTURE ( byte3 ) ;
json_string + = std : : string ( 1 , static_cast < char > ( byte3 ) ) ;
2017-04-23 16:40:17 +00:00
}
2017-04-23 18:32:05 +00:00
2017-04-23 16:40:17 +00:00
if ( byte4 ! = - 1 )
{
2017-04-23 18:32:05 +00:00
CAPTURE ( byte4 ) ;
json_string + = std : : string ( 1 , static_cast < char > ( byte4 ) ) ;
}
json_string + = " \" " ;
CAPTURE ( json_string ) ;
if ( success_expected )
{
CHECK_NOTHROW ( json : : parse ( json_string ) ) ;
}
else
{
2017-07-07 20:41:22 +00:00
CHECK_THROWS_AS ( json : : parse ( json_string ) , json : : parse_error & ) ;
2017-04-23 16:40:17 +00:00
}
}
2017-04-23 20:54:21 +00:00
TEST_CASE ( " Unicode " , " [hide] " )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
SECTION ( " RFC 3629 " )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
/*
RFC 3629 describes in Sect . 4 the syntax of UTF - 8 byte sequences as
follows :
A UTF - 8 string is a sequence of octets representing a sequence of UCS
characters . An octet sequence is valid UTF - 8 only if it matches the
following syntax , which is derived from the rules for encoding UTF - 8
and is expressed in the ABNF of [ RFC2234 ] .
UTF8 - octets = * ( UTF8 - char )
UTF8 - char = UTF8 - 1 / UTF8 - 2 / UTF8 - 3 / UTF8 - 4
UTF8 - 1 = % x00 - 7F
UTF8 - 2 = % xC2 - DF UTF8 - tail
UTF8 - 3 = % xE0 % xA0 - BF UTF8 - tail / % xE1 - EC 2 ( UTF8 - tail ) /
% xED % x80 - 9F UTF8 - tail / % xEE - EF 2 ( UTF8 - tail )
UTF8 - 4 = % xF0 % x90 - BF 2 ( UTF8 - tail ) / % xF1 - F3 3 ( UTF8 - tail ) /
% xF4 % x80 - 8F 2 ( UTF8 - tail )
UTF8 - tail = % x80 - BF
*/
SECTION ( " ill-formed first byte " )
{
for ( int byte1 = 0x80 ; byte1 < = 0xC1 ; + + byte1 )
{
check_utf8string ( false , byte1 ) ;
}
2017-04-23 13:12:50 +00:00
2017-04-23 20:54:21 +00:00
for ( int byte1 = 0xF5 ; byte1 < = 0xFF ; + + byte1 )
{
check_utf8string ( false , byte1 ) ;
}
2017-04-23 13:12:50 +00:00
}
2017-04-23 20:54:21 +00:00
SECTION ( " UTF8-1 (x00-x7F) " )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
SECTION ( " well-formed " )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte1 = 0x00 ; byte1 < = 0x7F ; + + byte1 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
// unescaped control characters are parse errors in JSON
if ( 0x00 < = byte1 and byte1 < = 0x1F )
{
check_utf8string ( false , byte1 ) ;
continue ;
}
2017-04-23 13:12:50 +00:00
2017-04-23 20:54:21 +00:00
// a single quote is a parse error in JSON
if ( byte1 = = 0x22 )
{
check_utf8string ( false , byte1 ) ;
continue ;
}
2017-04-23 13:12:50 +00:00
2017-04-23 20:54:21 +00:00
// a single backslash is a parse error in JSON
if ( byte1 = = 0x5C )
{
check_utf8string ( false , byte1 ) ;
continue ;
}
2017-04-23 13:12:50 +00:00
2017-04-23 20:54:21 +00:00
// all other characters are OK
check_utf8string ( true , byte1 ) ;
}
2017-04-23 13:12:50 +00:00
}
}
2017-04-23 20:54:21 +00:00
SECTION ( " UTF8-2 (xC2-xDF UTF8-tail) " )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
SECTION ( " well-formed " )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte1 = 0xC2 ; byte1 < = 0xDF ; + + byte1 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte2 = 0x80 ; byte2 < = 0xBF ; + + byte2 )
{
check_utf8string ( true , byte1 , byte2 ) ;
}
2017-04-23 13:12:50 +00:00
}
}
2017-04-23 20:54:21 +00:00
SECTION ( " ill-formed: missing second byte " )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte1 = 0xC2 ; byte1 < = 0xDF ; + + byte1 )
{
check_utf8string ( false , byte1 ) ;
}
2017-04-23 13:12:50 +00:00
}
2017-04-23 20:54:21 +00:00
SECTION ( " ill-formed: wrong second byte " )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte1 = 0xC2 ; byte1 < = 0xDF ; + + byte1 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte2 = 0x00 ; byte2 < = 0xFF ; + + byte2 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
// skip correct second byte
if ( 0x80 < = byte2 and byte2 < = 0xBF )
{
continue ;
}
2017-04-23 13:12:50 +00:00
2017-04-23 20:54:21 +00:00
check_utf8string ( false , byte1 , byte2 ) ;
}
2017-04-23 13:12:50 +00:00
}
}
}
2017-04-23 20:54:21 +00:00
SECTION ( " UTF8-3 (xE0 xA0-BF UTF8-tail) " )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
SECTION ( " well-formed " )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte1 = 0xE0 ; byte1 < = 0xE0 ; + + byte1 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte2 = 0xA0 ; byte2 < = 0xBF ; + + byte2 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte3 = 0x80 ; byte3 < = 0xBF ; + + byte3 )
{
check_utf8string ( true , byte1 , byte2 , byte3 ) ;
}
2017-04-23 13:12:50 +00:00
}
}
}
2017-04-23 20:54:21 +00:00
SECTION ( " ill-formed: missing second byte " )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte1 = 0xE0 ; byte1 < = 0xE0 ; + + byte1 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
check_utf8string ( false , byte1 ) ;
2017-04-23 13:12:50 +00:00
}
}
2017-04-23 20:54:21 +00:00
SECTION ( " ill-formed: missing third byte " )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte1 = 0xE0 ; byte1 < = 0xE0 ; + + byte1 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte2 = 0xA0 ; byte2 < = 0xBF ; + + byte2 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
check_utf8string ( false , byte1 , byte2 ) ;
2017-04-23 13:12:50 +00:00
}
}
}
2017-04-23 20:54:21 +00:00
SECTION ( " ill-formed: wrong second byte " )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte1 = 0xE0 ; byte1 < = 0xE0 ; + + byte1 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte2 = 0x00 ; byte2 < = 0xFF ; + + byte2 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
// skip correct second byte
if ( 0xA0 < = byte2 and byte2 < = 0xBF )
2017-04-23 13:12:50 +00:00
{
continue ;
}
2017-04-23 20:54:21 +00:00
for ( int byte3 = 0x80 ; byte3 < = 0xBF ; + + byte3 )
{
check_utf8string ( false , byte1 , byte2 , byte3 ) ;
}
2017-04-23 13:12:50 +00:00
}
}
}
2017-04-23 20:54:21 +00:00
SECTION ( " ill-formed: wrong third byte " )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte1 = 0xE0 ; byte1 < = 0xE0 ; + + byte1 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte2 = 0xA0 ; byte2 < = 0xBF ; + + byte2 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte3 = 0x00 ; byte3 < = 0xFF ; + + byte3 )
{
// skip correct third byte
if ( 0x80 < = byte3 and byte3 < = 0xBF )
{
continue ;
}
check_utf8string ( false , byte1 , byte2 , byte3 ) ;
}
2017-04-23 13:12:50 +00:00
}
}
}
}
2017-04-23 20:54:21 +00:00
SECTION ( " UTF8-3 (xE1-xEC UTF8-tail UTF8-tail) " )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
SECTION ( " well-formed " )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte1 = 0xE1 ; byte1 < = 0xEC ; + + byte1 )
{
for ( int byte2 = 0x80 ; byte2 < = 0xBF ; + + byte2 )
{
for ( int byte3 = 0x80 ; byte3 < = 0xBF ; + + byte3 )
{
check_utf8string ( true , byte1 , byte2 , byte3 ) ;
}
}
}
2017-04-23 13:12:50 +00:00
}
2017-04-23 20:54:21 +00:00
SECTION ( " ill-formed: missing second byte " )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte1 = 0xE1 ; byte1 < = 0xEC ; + + byte1 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
check_utf8string ( false , byte1 ) ;
2017-04-23 13:12:50 +00:00
}
}
2017-04-23 20:54:21 +00:00
SECTION ( " ill-formed: missing third byte " )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte1 = 0xE1 ; byte1 < = 0xEC ; + + byte1 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte2 = 0x80 ; byte2 < = 0xBF ; + + byte2 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
check_utf8string ( false , byte1 , byte2 ) ;
2017-04-23 13:12:50 +00:00
}
}
}
2017-04-23 20:54:21 +00:00
SECTION ( " ill-formed: wrong second byte " )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte1 = 0xE1 ; byte1 < = 0xEC ; + + byte1 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte2 = 0x00 ; byte2 < = 0xFF ; + + byte2 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
// skip correct second byte
if ( 0x80 < = byte2 and byte2 < = 0xBF )
2017-04-23 13:12:50 +00:00
{
continue ;
}
2017-04-23 20:54:21 +00:00
for ( int byte3 = 0x80 ; byte3 < = 0xBF ; + + byte3 )
{
check_utf8string ( false , byte1 , byte2 , byte3 ) ;
}
2017-04-23 13:12:50 +00:00
}
}
}
2017-04-23 20:54:21 +00:00
SECTION ( " ill-formed: wrong third byte " )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte1 = 0xE1 ; byte1 < = 0xEC ; + + byte1 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte2 = 0x80 ; byte2 < = 0xBF ; + + byte2 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte3 = 0x00 ; byte3 < = 0xFF ; + + byte3 )
{
// skip correct third byte
if ( 0x80 < = byte3 and byte3 < = 0xBF )
{
continue ;
}
check_utf8string ( false , byte1 , byte2 , byte3 ) ;
}
2017-04-23 13:12:50 +00:00
}
}
}
}
2017-04-23 20:54:21 +00:00
SECTION ( " UTF8-3 (xED x80-9F UTF8-tail) " )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
SECTION ( " well-formed " )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte1 = 0xED ; byte1 < = 0xED ; + + byte1 )
{
for ( int byte2 = 0x80 ; byte2 < = 0x9F ; + + byte2 )
{
for ( int byte3 = 0x80 ; byte3 < = 0xBF ; + + byte3 )
{
check_utf8string ( true , byte1 , byte2 , byte3 ) ;
}
}
}
2017-04-23 13:12:50 +00:00
}
2017-04-23 20:54:21 +00:00
SECTION ( " ill-formed: missing second byte " )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte1 = 0xED ; byte1 < = 0xED ; + + byte1 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
check_utf8string ( false , byte1 ) ;
2017-04-23 13:12:50 +00:00
}
}
2017-04-23 20:54:21 +00:00
SECTION ( " ill-formed: missing third byte " )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte1 = 0xED ; byte1 < = 0xED ; + + byte1 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte2 = 0x80 ; byte2 < = 0x9F ; + + byte2 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
check_utf8string ( false , byte1 , byte2 ) ;
2017-04-23 13:12:50 +00:00
}
}
}
2017-04-23 20:54:21 +00:00
SECTION ( " ill-formed: wrong second byte " )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte1 = 0xED ; byte1 < = 0xED ; + + byte1 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte2 = 0x00 ; byte2 < = 0xFF ; + + byte2 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
// skip correct second byte
if ( 0x80 < = byte2 and byte2 < = 0x9F )
2017-04-23 13:12:50 +00:00
{
continue ;
}
2017-04-23 20:54:21 +00:00
for ( int byte3 = 0x80 ; byte3 < = 0xBF ; + + byte3 )
{
check_utf8string ( false , byte1 , byte2 , byte3 ) ;
}
2017-04-23 13:12:50 +00:00
}
}
}
2017-04-23 20:54:21 +00:00
SECTION ( " ill-formed: wrong third byte " )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte1 = 0xED ; byte1 < = 0xED ; + + byte1 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte2 = 0x80 ; byte2 < = 0x9F ; + + byte2 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte3 = 0x00 ; byte3 < = 0xFF ; + + byte3 )
{
// skip correct third byte
if ( 0x80 < = byte3 and byte3 < = 0xBF )
{
continue ;
}
check_utf8string ( false , byte1 , byte2 , byte3 ) ;
}
2017-04-23 13:12:50 +00:00
}
}
}
}
2017-04-23 20:54:21 +00:00
SECTION ( " UTF8-3 (xEE-xEF UTF8-tail UTF8-tail) " )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
SECTION ( " well-formed " )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte1 = 0xEE ; byte1 < = 0xEF ; + + byte1 )
{
for ( int byte2 = 0x80 ; byte2 < = 0xBF ; + + byte2 )
{
for ( int byte3 = 0x80 ; byte3 < = 0xBF ; + + byte3 )
{
check_utf8string ( true , byte1 , byte2 , byte3 ) ;
}
}
}
2017-04-23 13:12:50 +00:00
}
2017-04-23 20:54:21 +00:00
SECTION ( " ill-formed: missing second byte " )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte1 = 0xEE ; byte1 < = 0xEF ; + + byte1 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
check_utf8string ( false , byte1 ) ;
2017-04-23 13:12:50 +00:00
}
}
2017-04-23 20:54:21 +00:00
SECTION ( " ill-formed: missing third byte " )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte1 = 0xEE ; byte1 < = 0xEF ; + + byte1 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte2 = 0x80 ; byte2 < = 0xBF ; + + byte2 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
check_utf8string ( false , byte1 , byte2 ) ;
2017-04-23 13:12:50 +00:00
}
}
}
2017-04-23 20:54:21 +00:00
SECTION ( " ill-formed: wrong second byte " )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte1 = 0xEE ; byte1 < = 0xEF ; + + byte1 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte2 = 0x00 ; byte2 < = 0xFF ; + + byte2 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
// skip correct second byte
if ( 0x80 < = byte2 and byte2 < = 0xBF )
2017-04-23 13:12:50 +00:00
{
continue ;
}
2017-04-23 20:54:21 +00:00
for ( int byte3 = 0x80 ; byte3 < = 0xBF ; + + byte3 )
{
check_utf8string ( false , byte1 , byte2 , byte3 ) ;
}
2017-04-23 13:12:50 +00:00
}
}
}
2017-04-23 20:54:21 +00:00
SECTION ( " ill-formed: wrong third byte " )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte1 = 0xEE ; byte1 < = 0xEF ; + + byte1 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte2 = 0x80 ; byte2 < = 0xBF ; + + byte2 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte3 = 0x00 ; byte3 < = 0xFF ; + + byte3 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
// skip correct third byte
if ( 0x80 < = byte3 and byte3 < = 0xBF )
{
continue ;
}
check_utf8string ( false , byte1 , byte2 , byte3 ) ;
2017-04-23 13:12:50 +00:00
}
}
}
}
}
2017-04-23 20:54:21 +00:00
SECTION ( " UTF8-4 (xF0 x90-BF UTF8-tail UTF8-tail) " )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
SECTION ( " well-formed " )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte1 = 0xF0 ; byte1 < = 0xF0 ; + + byte1 )
{
for ( int byte2 = 0x90 ; byte2 < = 0xBF ; + + byte2 )
{
for ( int byte3 = 0x80 ; byte3 < = 0xBF ; + + byte3 )
{
for ( int byte4 = 0x80 ; byte4 < = 0xBF ; + + byte4 )
{
check_utf8string ( true , byte1 , byte2 , byte3 , byte4 ) ;
}
}
}
}
2017-04-23 13:12:50 +00:00
}
2017-04-23 20:54:21 +00:00
SECTION ( " ill-formed: missing second byte " )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte1 = 0xF0 ; byte1 < = 0xF0 ; + + byte1 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
check_utf8string ( false , byte1 ) ;
2017-04-23 13:12:50 +00:00
}
}
2017-04-23 20:54:21 +00:00
SECTION ( " ill-formed: missing third byte " )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte1 = 0xF0 ; byte1 < = 0xF0 ; + + byte1 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte2 = 0x90 ; byte2 < = 0xBF ; + + byte2 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
check_utf8string ( false , byte1 , byte2 ) ;
2017-04-23 13:12:50 +00:00
}
}
}
2017-04-23 20:54:21 +00:00
SECTION ( " ill-formed: missing fourth byte " )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte1 = 0xF0 ; byte1 < = 0xF0 ; + + byte1 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte2 = 0x90 ; byte2 < = 0xBF ; + + byte2 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte3 = 0x80 ; byte3 < = 0xBF ; + + byte3 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
check_utf8string ( false , byte1 , byte2 , byte3 ) ;
2017-04-23 13:12:50 +00:00
}
}
}
}
2017-04-23 20:54:21 +00:00
SECTION ( " ill-formed: wrong second byte " )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte1 = 0xF0 ; byte1 < = 0xF0 ; + + byte1 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte2 = 0x00 ; byte2 < = 0xFF ; + + byte2 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
// skip correct second byte
if ( 0x90 < = byte2 and byte2 < = 0xBF )
2017-04-23 13:12:50 +00:00
{
continue ;
}
2017-04-23 20:54:21 +00:00
for ( int byte3 = 0x80 ; byte3 < = 0xBF ; + + byte3 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte4 = 0x80 ; byte4 < = 0xBF ; + + byte4 )
{
check_utf8string ( false , byte1 , byte2 , byte3 , byte4 ) ;
}
2017-04-23 13:12:50 +00:00
}
}
}
}
2017-04-23 20:54:21 +00:00
SECTION ( " ill-formed: wrong third byte " )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte1 = 0xF0 ; byte1 < = 0xF0 ; + + byte1 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte2 = 0x90 ; byte2 < = 0xBF ; + + byte2 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte3 = 0x00 ; byte3 < = 0xFF ; + + byte3 )
{
// skip correct third byte
if ( 0x80 < = byte3 and byte3 < = 0xBF )
{
continue ;
}
for ( int byte4 = 0x80 ; byte4 < = 0xBF ; + + byte4 )
{
check_utf8string ( false , byte1 , byte2 , byte3 , byte4 ) ;
}
}
}
}
}
SECTION ( " ill-formed: wrong fourth byte " )
{
for ( int byte1 = 0xF0 ; byte1 < = 0xF0 ; + + byte1 )
{
for ( int byte2 = 0x90 ; byte2 < = 0xBF ; + + byte2 )
{
for ( int byte3 = 0x80 ; byte3 < = 0xBF ; + + byte3 )
{
2017-04-23 13:12:50 +00:00
for ( int byte4 = 0x00 ; byte4 < = 0xFF ; + + byte4 )
{
2017-04-23 18:32:05 +00:00
// skip fourth second byte
2017-04-23 13:12:50 +00:00
if ( 0x80 < = byte3 and byte3 < = 0xBF )
{
continue ;
}
2017-04-23 18:32:05 +00:00
check_utf8string ( false , byte1 , byte2 , byte3 , byte4 ) ;
2017-04-23 13:12:50 +00:00
}
2017-04-23 20:54:21 +00:00
}
2017-04-23 13:12:50 +00:00
}
}
}
}
2017-04-23 20:54:21 +00:00
SECTION ( " UTF8-4 (xF1-F3 UTF8-tail UTF8-tail UTF8-tail) " )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
SECTION ( " well-formed " )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte1 = 0xF1 ; byte1 < = 0xF3 ; + + byte1 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte2 = 0x80 ; byte2 < = 0xBF ; + + byte2 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte3 = 0x80 ; byte3 < = 0xBF ; + + byte3 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte4 = 0x80 ; byte4 < = 0xBF ; + + byte4 )
{
check_utf8string ( true , byte1 , byte2 , byte3 , byte4 ) ;
}
2017-04-23 13:12:50 +00:00
}
}
}
}
2017-04-23 20:54:21 +00:00
SECTION ( " ill-formed: missing second byte " )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte1 = 0xF1 ; byte1 < = 0xF3 ; + + byte1 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
check_utf8string ( false , byte1 ) ;
2017-04-23 13:12:50 +00:00
}
}
2017-04-23 20:54:21 +00:00
SECTION ( " ill-formed: missing third byte " )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte1 = 0xF1 ; byte1 < = 0xF3 ; + + byte1 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte2 = 0x80 ; byte2 < = 0xBF ; + + byte2 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
check_utf8string ( false , byte1 , byte2 ) ;
2017-04-23 13:12:50 +00:00
}
}
}
2017-04-23 20:54:21 +00:00
SECTION ( " ill-formed: missing fourth byte " )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte1 = 0xF1 ; byte1 < = 0xF3 ; + + byte1 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte2 = 0x80 ; byte2 < = 0xBF ; + + byte2 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte3 = 0x80 ; byte3 < = 0xBF ; + + byte3 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
check_utf8string ( false , byte1 , byte2 , byte3 ) ;
2017-04-23 13:12:50 +00:00
}
}
}
}
2017-04-23 20:54:21 +00:00
SECTION ( " ill-formed: wrong second byte " )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte1 = 0xF1 ; byte1 < = 0xF3 ; + + byte1 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte2 = 0x00 ; byte2 < = 0xFF ; + + byte2 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
// skip correct second byte
if ( 0x80 < = byte2 and byte2 < = 0xBF )
2017-04-23 13:12:50 +00:00
{
continue ;
}
2017-04-23 20:54:21 +00:00
for ( int byte3 = 0x80 ; byte3 < = 0xBF ; + + byte3 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte4 = 0x80 ; byte4 < = 0xBF ; + + byte4 )
{
check_utf8string ( false , byte1 , byte2 , byte3 , byte4 ) ;
}
2017-04-23 13:12:50 +00:00
}
}
}
}
2017-04-23 20:54:21 +00:00
SECTION ( " ill-formed: wrong third byte " )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte1 = 0xF1 ; byte1 < = 0xF3 ; + + byte1 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte2 = 0x80 ; byte2 < = 0xBF ; + + byte2 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte3 = 0x00 ; byte3 < = 0xFF ; + + byte3 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
// skip correct third byte
2017-04-23 13:12:50 +00:00
if ( 0x80 < = byte3 and byte3 < = 0xBF )
{
continue ;
}
2017-04-23 20:54:21 +00:00
for ( int byte4 = 0x80 ; byte4 < = 0xBF ; + + byte4 )
{
check_utf8string ( false , byte1 , byte2 , byte3 , byte4 ) ;
}
2017-04-23 13:12:50 +00:00
}
}
}
}
2017-04-23 20:54:21 +00:00
SECTION ( " ill-formed: wrong fourth byte " )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte1 = 0xF1 ; byte1 < = 0xF3 ; + + byte1 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte2 = 0x80 ; byte2 < = 0xBF ; + + byte2 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte3 = 0x80 ; byte3 < = 0xBF ; + + byte3 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte4 = 0x00 ; byte4 < = 0xFF ; + + byte4 )
{
// skip correct fourth byte
if ( 0x80 < = byte3 and byte3 < = 0xBF )
{
continue ;
}
check_utf8string ( false , byte1 , byte2 , byte3 , byte4 ) ;
}
2017-04-23 13:12:50 +00:00
}
}
}
}
}
2017-04-23 20:54:21 +00:00
SECTION ( " UTF8-4 (xF4 x80-8F UTF8-tail UTF8-tail) " )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
SECTION ( " well-formed " )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte1 = 0xF4 ; byte1 < = 0xF4 ; + + byte1 )
{
for ( int byte2 = 0x80 ; byte2 < = 0x8F ; + + byte2 )
{
for ( int byte3 = 0x80 ; byte3 < = 0xBF ; + + byte3 )
{
for ( int byte4 = 0x80 ; byte4 < = 0xBF ; + + byte4 )
{
check_utf8string ( true , byte1 , byte2 , byte3 , byte4 ) ;
}
}
}
}
2017-04-23 13:12:50 +00:00
}
2017-04-23 20:54:21 +00:00
SECTION ( " ill-formed: missing second byte " )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte1 = 0xF4 ; byte1 < = 0xF4 ; + + byte1 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
check_utf8string ( false , byte1 ) ;
2017-04-23 13:12:50 +00:00
}
}
2017-04-23 20:54:21 +00:00
SECTION ( " ill-formed: missing third byte " )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte1 = 0xF4 ; byte1 < = 0xF4 ; + + byte1 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte2 = 0x80 ; byte2 < = 0x8F ; + + byte2 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
check_utf8string ( false , byte1 , byte2 ) ;
2017-04-23 13:12:50 +00:00
}
}
}
2017-04-23 20:54:21 +00:00
SECTION ( " ill-formed: missing fourth byte " )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte1 = 0xF4 ; byte1 < = 0xF4 ; + + byte1 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte2 = 0x80 ; byte2 < = 0x8F ; + + byte2 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte3 = 0x80 ; byte3 < = 0xBF ; + + byte3 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
check_utf8string ( false , byte1 , byte2 , byte3 ) ;
2017-04-23 13:12:50 +00:00
}
}
}
}
2017-04-23 20:54:21 +00:00
SECTION ( " ill-formed: wrong second byte " )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte1 = 0xF4 ; byte1 < = 0xF4 ; + + byte1 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte2 = 0x00 ; byte2 < = 0xFF ; + + byte2 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
// skip correct second byte
if ( 0x80 < = byte2 and byte2 < = 0x8F )
2017-04-23 13:12:50 +00:00
{
continue ;
}
2017-04-23 20:54:21 +00:00
for ( int byte3 = 0x80 ; byte3 < = 0xBF ; + + byte3 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte4 = 0x80 ; byte4 < = 0xBF ; + + byte4 )
{
check_utf8string ( false , byte1 , byte2 , byte3 , byte4 ) ;
}
2017-04-23 13:12:50 +00:00
}
}
}
}
2017-04-23 20:54:21 +00:00
SECTION ( " ill-formed: wrong third byte " )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte1 = 0xF4 ; byte1 < = 0xF4 ; + + byte1 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte2 = 0x80 ; byte2 < = 0x8F ; + + byte2 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
for ( int byte3 = 0x00 ; byte3 < = 0xFF ; + + byte3 )
2017-04-23 13:12:50 +00:00
{
2017-04-23 20:54:21 +00:00
// skip correct third byte
2017-04-23 13:12:50 +00:00
if ( 0x80 < = byte3 and byte3 < = 0xBF )
{
continue ;
}
2017-04-23 20:54:21 +00:00
for ( int byte4 = 0x80 ; byte4 < = 0xBF ; + + byte4 )
{
check_utf8string ( false , byte1 , byte2 , byte3 , byte4 ) ;
}
}
}
}
}
SECTION ( " ill-formed: wrong fourth byte " )
{
for ( int byte1 = 0xF4 ; byte1 < = 0xF4 ; + + byte1 )
{
for ( int byte2 = 0x80 ; byte2 < = 0x8F ; + + byte2 )
{
for ( int byte3 = 0x80 ; byte3 < = 0xBF ; + + byte3 )
{
for ( int byte4 = 0x00 ; byte4 < = 0xFF ; + + byte4 )
{
// skip correct fourth byte
if ( 0x80 < = byte3 and byte3 < = 0xBF )
{
continue ;
}
check_utf8string ( false , byte1 , byte2 , byte3 , byte4 ) ;
}
2017-04-23 13:12:50 +00:00
}
}
}
}
}
}
2017-04-23 20:54:21 +00:00
SECTION ( " \\ uxxxx sequences " )
2016-08-04 19:55:47 +00:00
{
// create an escaped string from a code point
const auto codepoint_to_unicode = [ ] ( std : : size_t cp )
{
2017-04-23 20:54:21 +00:00
// code points are represented as a six-character sequence: a
2016-08-04 19:55:47 +00:00
// reverse solidus, followed by the lowercase letter u, followed
// by four hexadecimal digits that encode the character's code
// point
std : : stringstream ss ;
ss < < " \\ u " < < std : : setw ( 4 ) < < std : : setfill ( ' 0 ' ) < < std : : hex < < cp ;
return ss . str ( ) ;
} ;
2017-04-23 20:54:21 +00:00
SECTION ( " correct sequences " )
2016-08-04 19:55:47 +00:00
{
2017-04-23 20:54:21 +00:00
// generate all UTF-8 code points; in total, 1112064 code points are
// generated: 0x1FFFFF code points - 2048 invalid values between
// 0xD800 and 0xDFFF.
for ( std : : size_t cp = 0 ; cp < = 0x10FFFFu ; + + cp )
2016-08-04 19:55:47 +00:00
{
2017-04-23 20:54:21 +00:00
// string to store the code point as in \uxxxx format
std : : string json_text = " \" " ;
2016-08-04 19:55:47 +00:00
2017-04-23 20:54:21 +00:00
// decide whether to use one or two \uxxxx sequences
if ( cp < 0x10000u )
{
// The Unicode standard permanently reserves these code point
// values for UTF-16 encoding of the high and low surrogates, and
// they will never be assigned a character, so there should be no
// reason to encode them. The official Unicode standard says that
// no UTF forms, including UTF-16, can encode these code points.
if ( cp > = 0xD800u and cp < = 0xDFFFu )
{
// if we would not skip these code points, we would get a
// "missing low surrogate" exception
continue ;
}
2016-08-04 19:55:47 +00:00
2017-04-23 20:54:21 +00:00
// code points in the Basic Multilingual Plane can be
// represented with one \uxxxx sequence
json_text + = codepoint_to_unicode ( cp ) ;
}
else
2016-08-04 19:55:47 +00:00
{
2017-04-23 20:54:21 +00:00
// To escape an extended character that is not in the Basic
// Multilingual Plane, the character is represented as a
// 12-character sequence, encoding the UTF-16 surrogate pair
const auto codepoint1 = 0xd800u + ( ( ( cp - 0x10000u ) > > 10 ) & 0x3ffu ) ;
const auto codepoint2 = 0xdc00u + ( ( cp - 0x10000u ) & 0x3ffu ) ;
json_text + = codepoint_to_unicode ( codepoint1 ) + codepoint_to_unicode ( codepoint2 ) ;
2016-08-04 19:55:47 +00:00
}
2017-04-23 20:54:21 +00:00
json_text + = " \" " ;
CAPTURE ( json_text ) ;
CHECK_NOTHROW ( json : : parse ( json_text ) ) ;
2016-08-04 19:55:47 +00:00
}
2017-04-23 20:54:21 +00:00
}
2017-09-13 16:56:54 +00:00
SECTION ( " incorrect sequences " )
{
SECTION ( " incorrect surrogate values " )
{
CHECK_THROWS_AS ( json : : parse ( " \" \\ uDC00 \\ uDC00 \" " ) , json : : parse_error & ) ;
CHECK_THROWS_WITH ( json : : parse ( " \" \\ uDC00 \\ uDC00 \" " ) ,
" [json.exception.parse_error.101] parse error at 7: syntax error - invalid string: surrogate U+DC00..U+DFFF must follow U+D800..U+DBFF; last read: ' \" \\ uDC00' " ) ;
CHECK_THROWS_AS ( json : : parse ( " \" \\ uD7FF \\ uDC00 \" " ) , json : : parse_error & ) ;
CHECK_THROWS_WITH ( json : : parse ( " \" \\ uD7FF \\ uDC00 \" " ) ,
" [json.exception.parse_error.101] parse error at 13: syntax error - invalid string: surrogate U+DC00..U+DFFF must follow U+D800..U+DBFF; last read: ' \" \\ uD7FF \\ uDC00' " ) ;
CHECK_THROWS_AS ( json : : parse ( " \" \\ uD800] \" " ) , json : : parse_error & ) ;
CHECK_THROWS_WITH ( json : : parse ( " \" \\ uD800] \" " ) ,
" [json.exception.parse_error.101] parse error at 8: syntax error - invalid string: surrogate U+DC00..U+DFFF must be followed by U+DC00..U+DFFF; last read: ' \" \\ uD800]' " ) ;
CHECK_THROWS_AS ( json : : parse ( " \" \\ uD800 \\ v \" " ) , json : : parse_error & ) ;
CHECK_THROWS_WITH ( json : : parse ( " \" \\ uD800 \\ v \" " ) ,
" [json.exception.parse_error.101] parse error at 9: syntax error - invalid string: surrogate U+DC00..U+DFFF must be followed by U+DC00..U+DFFF; last read: ' \" \\ uD800 \\ v' " ) ;
CHECK_THROWS_AS ( json : : parse ( " \" \\ uD800 \\ u123 \" " ) , json : : parse_error & ) ;
CHECK_THROWS_WITH ( json : : parse ( " \" \\ uD800 \\ u123 \" " ) ,
" [json.exception.parse_error.101] parse error at 13: syntax error - invalid string: ' \\ u' must be followed by 4 hex digits; last read: ' \" \\ uD800 \\ u123 \" ' " ) ;
CHECK_THROWS_AS ( json : : parse ( " \" \\ uD800 \\ uDBFF \" " ) , json : : parse_error & ) ;
CHECK_THROWS_WITH ( json : : parse ( " \" \\ uD800 \\ uDBFF \" " ) ,
" [json.exception.parse_error.101] parse error at 13: syntax error - invalid string: surrogate U+DC00..U+DFFF must be followed by U+DC00..U+DFFF; last read: ' \" \\ uD800 \\ uDBFF' " ) ;
CHECK_THROWS_AS ( json : : parse ( " \" \\ uD800 \\ uE000 \" " ) , json : : parse_error & ) ;
CHECK_THROWS_WITH ( json : : parse ( " \" \\ uD800 \\ uE000 \" " ) ,
" [json.exception.parse_error.101] parse error at 13: syntax error - invalid string: surrogate U+DC00..U+DFFF must be followed by U+DC00..U+DFFF; last read: ' \" \\ uD800 \\ uE000' " ) ;
}
}
2017-04-24 13:07:43 +00:00
#if 0
2017-04-23 20:54:21 +00:00
SECTION ( " incorrect sequences " )
{
SECTION ( " high surrogate without low surrogate " )
2016-08-04 19:55:47 +00:00
{
2017-04-23 20:54:21 +00:00
// D800..DBFF are high surrogates and must be followed by low
// surrogates DC00..DFFF; here, nothing follows
for ( std : : size_t cp = 0xD800u ; cp < = 0xDBFFu ; + + cp )
{
std : : string json_text = " \" " + codepoint_to_unicode ( cp ) + " \" " ;
CAPTURE ( json_text ) ;
2017-07-07 20:41:22 +00:00
CHECK_THROWS_AS ( json : : parse ( json_text ) , json : : parse_error & ) ;
2017-04-23 20:54:21 +00:00
}
2016-08-04 19:55:47 +00:00
}
2017-04-23 20:54:21 +00:00
SECTION ( " high surrogate with wrong low surrogate " )
{
// D800..DBFF are high surrogates and must be followed by low
// surrogates DC00..DFFF; here a different sequence follows
for ( std : : size_t cp1 = 0xD800u ; cp1 < = 0xDBFFu ; + + cp1 )
{
for ( std : : size_t cp2 = 0x0000u ; cp2 < = 0xFFFFu ; + + cp2 )
{
if ( 0xDC00u < = cp2 and cp2 < = 0xDFFFu )
{
continue ;
}
std : : string json_text = " \" " + codepoint_to_unicode ( cp1 ) + codepoint_to_unicode ( cp2 ) + " \" " ;
CAPTURE ( json_text ) ;
2017-07-07 20:41:22 +00:00
CHECK_THROWS_AS ( json : : parse ( json_text ) , json : : parse_error & ) ;
2017-04-23 20:54:21 +00:00
}
}
}
2016-08-04 19:55:47 +00:00
2017-04-23 20:54:21 +00:00
SECTION ( " low surrogate without high surrogate " )
{
// low surrogates DC00..DFFF must follow high surrogates; here,
// they occur alone
for ( std : : size_t cp = 0xDC00u ; cp < = 0xDFFFu ; + + cp )
{
std : : string json_text = " \" " + codepoint_to_unicode ( cp ) + " \" " ;
CAPTURE ( json_text ) ;
2017-07-07 20:41:22 +00:00
CHECK_THROWS_AS ( json : : parse ( json_text ) , json : : parse_error & ) ;
2017-04-23 20:54:21 +00:00
}
}
2016-08-04 19:55:47 +00:00
}
2017-04-24 13:07:43 +00:00
# endif
2016-08-04 19:55:47 +00:00
}
SECTION ( " read all unicode characters " )
{
// read a file with all unicode characters stored as single-character
// strings in a JSON array
std : : ifstream f ( " test/data/json_nlohmann_tests/all_unicode.json " ) ;
json j ;
2017-03-28 22:39:47 +00:00
CHECK_NOTHROW ( f > > j ) ;
2016-08-04 19:55:47 +00:00
// the array has 1112064 + 1 elemnts (a terminating "null" value)
// Note: 1112064 = 0x1FFFFF code points - 2048 invalid values between
// 0xD800 and 0xDFFF.
CHECK ( j . size ( ) = = 1112065 ) ;
SECTION ( " check JSON Pointers " )
{
for ( auto s : j )
{
// skip non-string JSON values
if ( not s . is_string ( ) )
{
continue ;
}
std : : string ptr = s ;
// tilde must be followed by 0 or 1
if ( ptr = = " ~ " )
{
ptr + = " 0 " ;
}
// JSON Pointers must begin with "/"
ptr = " / " + ptr ;
CHECK_NOTHROW ( json : : json_pointer ( " / " + ptr ) ) ;
// check escape/unescape roundtrip
auto escaped = json : : json_pointer : : escape ( ptr ) ;
json : : json_pointer : : unescape ( escaped ) ;
CHECK ( escaped = = ptr ) ;
}
}
}
SECTION ( " ignore byte-order-mark " )
{
2017-06-02 10:38:32 +00:00
SECTION ( " in a stream " )
{
// read a file with a UTF-8 BOM
std : : ifstream f ( " test/data/json_nlohmann_tests/bom.json " ) ;
json j ;
CHECK_NOTHROW ( f > > j ) ;
}
SECTION ( " with an iterator " )
{
std : : string i = " \xef \xbb \xbf { \n \" foo \" : true \n } " ;
CHECK_NOTHROW ( json : : parse ( i . begin ( ) , i . end ( ) ) ) ;
}
2016-08-04 19:55:47 +00:00
}
SECTION ( " error for incomplete/wrong BOM " )
{
2017-07-07 20:41:22 +00:00
CHECK_THROWS_AS ( json : : parse ( " \xef \xbb " ) , json : : parse_error & ) ;
CHECK_THROWS_AS ( json : : parse ( " \xef \xbb \xbb " ) , json : : parse_error & ) ;
2016-08-04 19:55:47 +00:00
}
}