fixed BOM handling #344

This commit is contained in:
Niels 2016-10-29 21:08:30 +02:00
parent b820bb3b1f
commit a8136c578a
3 changed files with 252 additions and 285 deletions

View file

@ -7620,6 +7620,14 @@ class basic_json
{
// fill buffer
fill_line_buffer();
// skip UTF-8 byte-order mark
if (m_line_buffer.size() >= 3 and m_line_buffer.substr(0, 3) == "\xEF\xBB\xBF")
{
m_line_buffer[0] = ' ';
m_line_buffer[1] = ' ';
m_line_buffer[2] = ' ';
}
}
// switch off unwanted functions (due to pointer members)
@ -7826,7 +7834,7 @@ class basic_json
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
};
if ((m_limit - m_cursor) < 6)
if ((m_limit - m_cursor) < 5)
{
fill_line_buffer();
}
@ -7835,7 +7843,7 @@ class basic_json
{
goto basic_json_parser_6;
}
if (yych <= '\\')
if (yych <= '[')
{
if (yych <= '-')
{
@ -7884,63 +7892,59 @@ class basic_json
{
goto basic_json_parser_17;
}
if (yych == '[')
if (yych <= 'Z')
{
goto basic_json_parser_19;
}
goto basic_json_parser_4;
}
goto basic_json_parser_19;
}
}
}
else
{
if (yych <= 't')
if (yych <= 'n')
{
if (yych <= 'f')
if (yych <= 'e')
{
if (yych <= ']')
if (yych == ']')
{
goto basic_json_parser_21;
}
if (yych <= 'e')
{
goto basic_json_parser_4;
}
else
{
if (yych <= 'f')
{
goto basic_json_parser_23;
}
else
{
if (yych == 'n')
{
goto basic_json_parser_24;
}
if (yych <= 's')
if (yych <= 'm')
{
goto basic_json_parser_4;
}
goto basic_json_parser_25;
goto basic_json_parser_24;
}
}
else
{
if (yych <= '|')
if (yych <= 'z')
{
if (yych == '{')
if (yych == 't')
{
goto basic_json_parser_25;
}
goto basic_json_parser_4;
}
else
{
if (yych <= '{')
{
goto basic_json_parser_26;
}
goto basic_json_parser_4;
}
else
{
if (yych <= '}')
if (yych == '}')
{
goto basic_json_parser_28;
}
if (yych == 0xC3)
{
goto basic_json_parser_30;
}
goto basic_json_parser_4;
}
}
@ -7981,7 +7985,7 @@ basic_json_parser_9:
}
if (yych <= 0x7F)
{
goto basic_json_parser_32;
goto basic_json_parser_31;
}
if (yych <= 0xC1)
{
@ -7989,7 +7993,7 @@ basic_json_parser_9:
}
if (yych <= 0xF4)
{
goto basic_json_parser_32;
goto basic_json_parser_31;
}
goto basic_json_parser_5;
basic_json_parser_10:
@ -8020,18 +8024,18 @@ basic_json_parser_13:
{
if (yych == '.')
{
goto basic_json_parser_44;
goto basic_json_parser_43;
}
}
else
{
if (yych <= 'E')
{
goto basic_json_parser_45;
goto basic_json_parser_44;
}
if (yych == 'e')
{
goto basic_json_parser_45;
goto basic_json_parser_44;
}
}
basic_json_parser_14:
@ -8055,7 +8059,7 @@ basic_json_parser_15:
{
if (yych == '.')
{
goto basic_json_parser_44;
goto basic_json_parser_43;
}
goto basic_json_parser_14;
}
@ -8063,11 +8067,11 @@ basic_json_parser_15:
{
if (yych <= 'E')
{
goto basic_json_parser_45;
goto basic_json_parser_44;
}
if (yych == 'e')
{
goto basic_json_parser_45;
goto basic_json_parser_44;
}
goto basic_json_parser_14;
}
@ -8094,7 +8098,7 @@ basic_json_parser_23:
yych = *(m_marker = ++m_cursor);
if (yych == 'a')
{
goto basic_json_parser_46;
goto basic_json_parser_45;
}
goto basic_json_parser_5;
basic_json_parser_24:
@ -8102,7 +8106,7 @@ basic_json_parser_24:
yych = *(m_marker = ++m_cursor);
if (yych == 'u')
{
goto basic_json_parser_47;
goto basic_json_parser_46;
}
goto basic_json_parser_5;
basic_json_parser_25:
@ -8110,7 +8114,7 @@ basic_json_parser_25:
yych = *(m_marker = ++m_cursor);
if (yych == 'r')
{
goto basic_json_parser_48;
goto basic_json_parser_47;
}
goto basic_json_parser_5;
basic_json_parser_26:
@ -8126,24 +8130,16 @@ basic_json_parser_28:
break;
}
basic_json_parser_30:
yyaccept = 0;
yych = *(m_marker = ++m_cursor);
if (yych == 0xAF)
{
goto basic_json_parser_49;
}
goto basic_json_parser_5;
basic_json_parser_31:
++m_cursor;
if (m_limit <= m_cursor)
{
fill_line_buffer();
}
yych = *m_cursor;
basic_json_parser_32:
basic_json_parser_31:
if (yybm[0 + yych] & 128)
{
goto basic_json_parser_31;
goto basic_json_parser_30;
}
if (yych <= 0xE0)
{
@ -8151,25 +8147,25 @@ basic_json_parser_32:
{
if (yych <= 0x1F)
{
goto basic_json_parser_33;
goto basic_json_parser_32;
}
if (yych <= '"')
{
goto basic_json_parser_34;
goto basic_json_parser_33;
}
goto basic_json_parser_36;
goto basic_json_parser_35;
}
else
{
if (yych <= 0xC1)
{
goto basic_json_parser_33;
goto basic_json_parser_32;
}
if (yych <= 0xDF)
{
goto basic_json_parser_37;
goto basic_json_parser_36;
}
goto basic_json_parser_38;
goto basic_json_parser_37;
}
}
else
@ -8178,27 +8174,27 @@ basic_json_parser_32:
{
if (yych == 0xED)
{
goto basic_json_parser_40;
}
goto basic_json_parser_39;
}
goto basic_json_parser_38;
}
else
{
if (yych <= 0xF0)
{
goto basic_json_parser_41;
goto basic_json_parser_40;
}
if (yych <= 0xF3)
{
goto basic_json_parser_42;
goto basic_json_parser_41;
}
if (yych <= 0xF4)
{
goto basic_json_parser_43;
goto basic_json_parser_42;
}
}
}
basic_json_parser_33:
basic_json_parser_32:
m_cursor = m_marker;
if (yyaccept == 0)
{
@ -8208,13 +8204,13 @@ basic_json_parser_33:
{
goto basic_json_parser_14;
}
basic_json_parser_34:
basic_json_parser_33:
++m_cursor;
{
last_token_type = token_type::value_string;
break;
}
basic_json_parser_36:
basic_json_parser_35:
++m_cursor;
if (m_limit <= m_cursor)
{
@ -8227,13 +8223,13 @@ basic_json_parser_36:
{
if (yych == '"')
{
goto basic_json_parser_31;
goto basic_json_parser_30;
}
if (yych <= '.')
{
goto basic_json_parser_33;
goto basic_json_parser_32;
}
goto basic_json_parser_31;
goto basic_json_parser_30;
}
else
{
@ -8241,17 +8237,17 @@ basic_json_parser_36:
{
if (yych <= '[')
{
goto basic_json_parser_33;
goto basic_json_parser_32;
}
goto basic_json_parser_31;
goto basic_json_parser_30;
}
else
{
if (yych == 'b')
{
goto basic_json_parser_31;
goto basic_json_parser_30;
}
goto basic_json_parser_33;
goto basic_json_parser_32;
}
}
}
@ -8261,13 +8257,13 @@ basic_json_parser_36:
{
if (yych <= 'f')
{
goto basic_json_parser_31;
goto basic_json_parser_30;
}
if (yych == 'n')
{
goto basic_json_parser_31;
goto basic_json_parser_30;
}
goto basic_json_parser_33;
goto basic_json_parser_32;
}
else
{
@ -8275,25 +8271,25 @@ basic_json_parser_36:
{
if (yych <= 'r')
{
goto basic_json_parser_31;
goto basic_json_parser_30;
}
goto basic_json_parser_33;
goto basic_json_parser_32;
}
else
{
if (yych <= 't')
{
goto basic_json_parser_31;
goto basic_json_parser_30;
}
if (yych <= 'u')
{
goto basic_json_parser_50;
goto basic_json_parser_48;
}
goto basic_json_parser_33;
goto basic_json_parser_32;
}
}
}
basic_json_parser_37:
basic_json_parser_36:
++m_cursor;
if (m_limit <= m_cursor)
{
@ -8302,14 +8298,14 @@ basic_json_parser_37:
yych = *m_cursor;
if (yych <= 0x7F)
{
goto basic_json_parser_33;
goto basic_json_parser_32;
}
if (yych <= 0xBF)
{
goto basic_json_parser_31;
goto basic_json_parser_30;
}
goto basic_json_parser_33;
basic_json_parser_38:
goto basic_json_parser_32;
basic_json_parser_37:
++m_cursor;
if (m_limit <= m_cursor)
{
@ -8318,13 +8314,29 @@ basic_json_parser_38:
yych = *m_cursor;
if (yych <= 0x9F)
{
goto basic_json_parser_33;
goto basic_json_parser_32;
}
if (yych <= 0xBF)
{
goto basic_json_parser_37;
goto basic_json_parser_36;
}
goto basic_json_parser_33;
goto basic_json_parser_32;
basic_json_parser_38:
++m_cursor;
if (m_limit <= m_cursor)
{
fill_line_buffer();
}
yych = *m_cursor;
if (yych <= 0x7F)
{
goto basic_json_parser_32;
}
if (yych <= 0xBF)
{
goto basic_json_parser_36;
}
goto basic_json_parser_32;
basic_json_parser_39:
++m_cursor;
if (m_limit <= m_cursor)
@ -8334,30 +8346,14 @@ basic_json_parser_39:
yych = *m_cursor;
if (yych <= 0x7F)
{
goto basic_json_parser_33;
}
if (yych <= 0xBF)
{
goto basic_json_parser_37;
}
goto basic_json_parser_33;
basic_json_parser_40:
++m_cursor;
if (m_limit <= m_cursor)
{
fill_line_buffer();
}
yych = *m_cursor;
if (yych <= 0x7F)
{
goto basic_json_parser_33;
goto basic_json_parser_32;
}
if (yych <= 0x9F)
{
goto basic_json_parser_37;
goto basic_json_parser_36;
}
goto basic_json_parser_33;
basic_json_parser_41:
goto basic_json_parser_32;
basic_json_parser_40:
++m_cursor;
if (m_limit <= m_cursor)
{
@ -8366,13 +8362,29 @@ basic_json_parser_41:
yych = *m_cursor;
if (yych <= 0x8F)
{
goto basic_json_parser_33;
goto basic_json_parser_32;
}
if (yych <= 0xBF)
{
goto basic_json_parser_39;
goto basic_json_parser_38;
}
goto basic_json_parser_33;
goto basic_json_parser_32;
basic_json_parser_41:
++m_cursor;
if (m_limit <= m_cursor)
{
fill_line_buffer();
}
yych = *m_cursor;
if (yych <= 0x7F)
{
goto basic_json_parser_32;
}
if (yych <= 0xBF)
{
goto basic_json_parser_38;
}
goto basic_json_parser_32;
basic_json_parser_42:
++m_cursor;
if (m_limit <= m_cursor)
@ -8382,95 +8394,72 @@ basic_json_parser_42:
yych = *m_cursor;
if (yych <= 0x7F)
{
goto basic_json_parser_33;
}
if (yych <= 0xBF)
{
goto basic_json_parser_39;
}
goto basic_json_parser_33;
basic_json_parser_43:
++m_cursor;
if (m_limit <= m_cursor)
{
fill_line_buffer();
}
yych = *m_cursor;
if (yych <= 0x7F)
{
goto basic_json_parser_33;
goto basic_json_parser_32;
}
if (yych <= 0x8F)
{
goto basic_json_parser_39;
goto basic_json_parser_38;
}
goto basic_json_parser_33;
basic_json_parser_44:
goto basic_json_parser_32;
basic_json_parser_43:
yych = *++m_cursor;
if (yych <= '/')
{
goto basic_json_parser_33;
goto basic_json_parser_32;
}
if (yych <= '9')
{
goto basic_json_parser_51;
goto basic_json_parser_49;
}
goto basic_json_parser_33;
basic_json_parser_45:
goto basic_json_parser_32;
basic_json_parser_44:
yych = *++m_cursor;
if (yych <= ',')
{
if (yych == '+')
{
goto basic_json_parser_53;
goto basic_json_parser_51;
}
goto basic_json_parser_33;
goto basic_json_parser_32;
}
else
{
if (yych <= '-')
{
goto basic_json_parser_53;
goto basic_json_parser_51;
}
if (yych <= '/')
{
goto basic_json_parser_33;
goto basic_json_parser_32;
}
if (yych <= '9')
{
goto basic_json_parser_52;
}
goto basic_json_parser_32;
}
basic_json_parser_45:
yych = *++m_cursor;
if (yych == 'l')
{
goto basic_json_parser_54;
}
goto basic_json_parser_33;
}
goto basic_json_parser_32;
basic_json_parser_46:
yych = *++m_cursor;
if (yych == 'l')
{
goto basic_json_parser_56;
goto basic_json_parser_55;
}
goto basic_json_parser_33;
goto basic_json_parser_32;
basic_json_parser_47:
yych = *++m_cursor;
if (yych == 'l')
{
goto basic_json_parser_57;
}
goto basic_json_parser_33;
basic_json_parser_48:
yych = *++m_cursor;
if (yych == 'u')
{
goto basic_json_parser_58;
goto basic_json_parser_56;
}
goto basic_json_parser_33;
basic_json_parser_49:
yych = *++m_cursor;
if (yych == 0xC2)
{
goto basic_json_parser_59;
}
goto basic_json_parser_33;
basic_json_parser_50:
goto basic_json_parser_32;
basic_json_parser_48:
++m_cursor;
if (m_limit <= m_cursor)
{
@ -8481,31 +8470,31 @@ basic_json_parser_50:
{
if (yych <= '/')
{
goto basic_json_parser_33;
goto basic_json_parser_32;
}
if (yych <= '9')
{
goto basic_json_parser_60;
goto basic_json_parser_57;
}
goto basic_json_parser_33;
goto basic_json_parser_32;
}
else
{
if (yych <= 'F')
{
goto basic_json_parser_60;
goto basic_json_parser_57;
}
if (yych <= '`')
{
goto basic_json_parser_33;
goto basic_json_parser_32;
}
if (yych <= 'f')
{
goto basic_json_parser_60;
goto basic_json_parser_57;
}
goto basic_json_parser_33;
goto basic_json_parser_32;
}
basic_json_parser_51:
basic_json_parser_49:
yyaccept = 1;
m_marker = ++m_cursor;
if ((m_limit - m_cursor) < 3)
@ -8521,7 +8510,7 @@ basic_json_parser_51:
}
if (yych <= '9')
{
goto basic_json_parser_51;
goto basic_json_parser_49;
}
goto basic_json_parser_14;
}
@ -8529,25 +8518,25 @@ basic_json_parser_51:
{
if (yych <= 'E')
{
goto basic_json_parser_45;
goto basic_json_parser_44;
}
if (yych == 'e')
{
goto basic_json_parser_45;
goto basic_json_parser_44;
}
goto basic_json_parser_14;
}
basic_json_parser_53:
basic_json_parser_51:
yych = *++m_cursor;
if (yych <= '/')
{
goto basic_json_parser_33;
goto basic_json_parser_32;
}
if (yych >= ':')
{
goto basic_json_parser_33;
goto basic_json_parser_32;
}
basic_json_parser_54:
basic_json_parser_52:
++m_cursor;
if (m_limit <= m_cursor)
{
@ -8560,99 +8549,85 @@ basic_json_parser_54:
}
if (yych <= '9')
{
goto basic_json_parser_54;
goto basic_json_parser_52;
}
goto basic_json_parser_14;
basic_json_parser_56:
basic_json_parser_54:
yych = *++m_cursor;
if (yych == 's')
{
goto basic_json_parser_61;
goto basic_json_parser_58;
}
goto basic_json_parser_33;
basic_json_parser_57:
goto basic_json_parser_32;
basic_json_parser_55:
yych = *++m_cursor;
if (yych == 'l')
{
goto basic_json_parser_62;
goto basic_json_parser_59;
}
goto basic_json_parser_32;
basic_json_parser_56:
yych = *++m_cursor;
if (yych == 'e')
{
goto basic_json_parser_61;
}
goto basic_json_parser_32;
basic_json_parser_57:
++m_cursor;
if (m_limit <= m_cursor)
{
fill_line_buffer();
}
yych = *m_cursor;
if (yych <= '@')
{
if (yych <= '/')
{
goto basic_json_parser_32;
}
if (yych <= '9')
{
goto basic_json_parser_63;
}
goto basic_json_parser_32;
}
else
{
if (yych <= 'F')
{
goto basic_json_parser_63;
}
if (yych <= '`')
{
goto basic_json_parser_32;
}
if (yych <= 'f')
{
goto basic_json_parser_63;
}
goto basic_json_parser_32;
}
goto basic_json_parser_33;
basic_json_parser_58:
yych = *++m_cursor;
if (yych == 'e')
{
goto basic_json_parser_64;
}
goto basic_json_parser_33;
goto basic_json_parser_32;
basic_json_parser_59:
yych = *++m_cursor;
if (yych == 0xBB)
{
goto basic_json_parser_66;
}
goto basic_json_parser_33;
basic_json_parser_60:
++m_cursor;
if (m_limit <= m_cursor)
{
fill_line_buffer();
}
yych = *m_cursor;
if (yych <= '@')
{
if (yych <= '/')
{
goto basic_json_parser_33;
}
if (yych <= '9')
{
goto basic_json_parser_67;
}
goto basic_json_parser_33;
}
else
{
if (yych <= 'F')
{
goto basic_json_parser_67;
}
if (yych <= '`')
{
goto basic_json_parser_33;
}
if (yych <= 'f')
{
goto basic_json_parser_67;
}
goto basic_json_parser_33;
}
basic_json_parser_61:
yych = *++m_cursor;
if (yych == 'e')
{
goto basic_json_parser_68;
}
goto basic_json_parser_33;
basic_json_parser_62:
++m_cursor;
{
last_token_type = token_type::literal_null;
break;
}
basic_json_parser_64:
basic_json_parser_61:
++m_cursor;
{
last_token_type = token_type::literal_true;
break;
}
basic_json_parser_66:
yych = *++m_cursor;
if (yych == 0xC2)
{
goto basic_json_parser_70;
}
goto basic_json_parser_33;
basic_json_parser_67:
basic_json_parser_63:
++m_cursor;
if (m_limit <= m_cursor)
{
@ -8663,44 +8638,37 @@ basic_json_parser_67:
{
if (yych <= '/')
{
goto basic_json_parser_33;
goto basic_json_parser_32;
}
if (yych <= '9')
{
goto basic_json_parser_71;
goto basic_json_parser_66;
}
goto basic_json_parser_33;
goto basic_json_parser_32;
}
else
{
if (yych <= 'F')
{
goto basic_json_parser_71;
goto basic_json_parser_66;
}
if (yych <= '`')
{
goto basic_json_parser_33;
goto basic_json_parser_32;
}
if (yych <= 'f')
{
goto basic_json_parser_71;
goto basic_json_parser_66;
}
goto basic_json_parser_33;
goto basic_json_parser_32;
}
basic_json_parser_68:
basic_json_parser_64:
++m_cursor;
{
last_token_type = token_type::literal_false;
break;
}
basic_json_parser_70:
yych = *++m_cursor;
if (yych == 0xBF)
{
goto basic_json_parser_72;
}
goto basic_json_parser_33;
basic_json_parser_71:
basic_json_parser_66:
++m_cursor;
if (m_limit <= m_cursor)
{
@ -8711,34 +8679,29 @@ basic_json_parser_71:
{
if (yych <= '/')
{
goto basic_json_parser_33;
goto basic_json_parser_32;
}
if (yych <= '9')
{
goto basic_json_parser_31;
goto basic_json_parser_30;
}
goto basic_json_parser_33;
goto basic_json_parser_32;
}
else
{
if (yych <= 'F')
{
goto basic_json_parser_31;
goto basic_json_parser_30;
}
if (yych <= '`')
{
goto basic_json_parser_33;
goto basic_json_parser_32;
}
if (yych <= 'f')
{
goto basic_json_parser_31;
goto basic_json_parser_30;
}
goto basic_json_parser_33;
}
basic_json_parser_72:
++m_cursor;
{
continue;
goto basic_json_parser_32;
}
}

View file

@ -7620,6 +7620,14 @@ class basic_json
{
// fill buffer
fill_line_buffer();
// skip UTF-8 byte-order mark
if (m_line_buffer.size() >= 3 and m_line_buffer.substr(0, 3) == "\xEF\xBB\xBF")
{
m_line_buffer[0] = ' ';
m_line_buffer[1] = ' ';
m_line_buffer[2] = ' ';
}
}
// switch off unwanted functions (due to pointer members)
@ -7802,10 +7810,6 @@ class basic_json
ws = [ \t\n\r]+;
ws { continue; }
// ignore byte-order-mark
bom = "\xEF\xBB\xBF";
bom { continue; }
// structural characters
"[" { last_token_type = token_type::begin_array; break; }
"]" { last_token_type = token_type::end_array; break; }

View file

@ -159,7 +159,7 @@ TEST_CASE("Unicode", "[hide]")
}
}
}
/*
SECTION("ignore byte-order-mark")
{
// read a file with a UTF-8 BOM
@ -167,7 +167,7 @@ TEST_CASE("Unicode", "[hide]")
json j;
CHECK_NOTHROW(j << f);
}
*/
SECTION("error for incomplete/wrong BOM")
{
CHECK_THROWS_AS(json::parse("\xef\xbb"), std::invalid_argument);