small tidy up after pull request #22
- removed IDE leftovers - adjusted const’ness of some functions - style guide
This commit is contained in:
parent
6533b33387
commit
bd05c7a18a
9 changed files with 247 additions and 107 deletions
2
.gitignore
vendored
2
.gitignore
vendored
|
@ -46,3 +46,5 @@ CMakeFiles
|
||||||
libjson.a
|
libjson.a
|
||||||
|
|
||||||
Testing
|
Testing
|
||||||
|
|
||||||
|
.idea
|
|
@ -1,35 +0,0 @@
|
||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<project version="4">
|
|
||||||
<component name="ProjectCodeStyleSettingsManager">
|
|
||||||
<option name="PER_PROJECT_SETTINGS">
|
|
||||||
<value>
|
|
||||||
<Objective-C-extensions>
|
|
||||||
<option name="GENERATE_INSTANCE_VARIABLES_FOR_PROPERTIES" value="ASK" />
|
|
||||||
<option name="RELEASE_STYLE" value="IVAR" />
|
|
||||||
<file>
|
|
||||||
<option name="com.jetbrains.objc.util.OCDeclarationKind" value="Import" />
|
|
||||||
<option name="com.jetbrains.objc.util.OCDeclarationKind" value="Macro" />
|
|
||||||
<option name="com.jetbrains.objc.util.OCDeclarationKind" value="Typedef" />
|
|
||||||
<option name="com.jetbrains.objc.util.OCDeclarationKind" value="Struct" />
|
|
||||||
<option name="com.jetbrains.objc.util.OCDeclarationKind" value="Enum" />
|
|
||||||
<option name="com.jetbrains.objc.util.OCDeclarationKind" value="ClassPredef" />
|
|
||||||
<option name="com.jetbrains.objc.util.OCDeclarationKind" value="Constant" />
|
|
||||||
<option name="com.jetbrains.objc.util.OCDeclarationKind" value="Global" />
|
|
||||||
<option name="com.jetbrains.objc.util.OCDeclarationKind" value="FunctionPredecl" />
|
|
||||||
<option name="com.jetbrains.objc.util.OCDeclarationKind" value="Function" />
|
|
||||||
<option name="com.jetbrains.objc.util.OCDeclarationKind" value="Class" />
|
|
||||||
</file>
|
|
||||||
<class>
|
|
||||||
<option name="com.jetbrains.objc.util.OCDeclarationKind" value="Property" />
|
|
||||||
<option name="com.jetbrains.objc.util.OCDeclarationKind" value="Synthesize" />
|
|
||||||
<option name="com.jetbrains.objc.util.OCDeclarationKind" value="InitMethod" />
|
|
||||||
<option name="com.jetbrains.objc.util.OCDeclarationKind" value="StaticMethod" />
|
|
||||||
<option name="com.jetbrains.objc.util.OCDeclarationKind" value="InstanceMethod" />
|
|
||||||
<option name="com.jetbrains.objc.util.OCDeclarationKind" value="DeallocMethod" />
|
|
||||||
</class>
|
|
||||||
</Objective-C-extensions>
|
|
||||||
</value>
|
|
||||||
</option>
|
|
||||||
<option name="PREFERRED_PROJECT_CODE_STYLE" value="Default (1)" />
|
|
||||||
</component>
|
|
||||||
</project>
|
|
|
@ -1,8 +0,0 @@
|
||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<module type="CPP_MODULE" version="4">
|
|
||||||
<component name="NewModuleRootManager">
|
|
||||||
<content url="file://$MODULE_DIR$" />
|
|
||||||
<orderEntry type="inheritedJdk" />
|
|
||||||
<orderEntry type="sourceFolder" forTests="false" />
|
|
||||||
</component>
|
|
||||||
</module>
|
|
|
@ -1,5 +0,0 @@
|
||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<project version="4">
|
|
||||||
<component name="CMakeWorkspace" PROJECT_DIR="$PROJECT_DIR$" />
|
|
||||||
<component name="ProjectRootManager" version="2" />
|
|
||||||
</project>
|
|
|
@ -1,8 +0,0 @@
|
||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<project version="4">
|
|
||||||
<component name="ProjectModuleManager">
|
|
||||||
<modules>
|
|
||||||
<module fileurl="file://$PROJECT_DIR$/.idea/json.iml" filepath="$PROJECT_DIR$/.idea/json.iml" />
|
|
||||||
</modules>
|
|
||||||
</component>
|
|
||||||
</project>
|
|
|
@ -1,6 +0,0 @@
|
||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<project version="4">
|
|
||||||
<component name="VcsDirectoryMappings">
|
|
||||||
<mapping directory="$PROJECT_DIR$" vcs="Git" />
|
|
||||||
</component>
|
|
||||||
</project>
|
|
|
@ -418,9 +418,15 @@ class json
|
||||||
/// read the next character, stripping whitespace
|
/// read the next character, stripping whitespace
|
||||||
bool next();
|
bool next();
|
||||||
/// raise an exception with an error message
|
/// raise an exception with an error message
|
||||||
inline void error(const std::string&) __attribute__((noreturn));
|
inline void error(const std::string&) const __attribute__((noreturn));
|
||||||
/// parse a quoted string
|
/// parse a quoted string
|
||||||
inline std::string parseString();
|
inline std::string parseString();
|
||||||
|
/// transforms a unicode codepoint to it's UTF-8 presentation
|
||||||
|
std::string codePointToUTF8(unsigned int codePoint) const;
|
||||||
|
/// parses 4 hex characters that represent a unicode code point
|
||||||
|
inline unsigned int parse4HexCodePoint();
|
||||||
|
/// parses \uXXXX[\uXXXX] unicode escape characters
|
||||||
|
inline std::string parseUnicodeEscape();
|
||||||
/// parse a Boolean "true"
|
/// parse a Boolean "true"
|
||||||
inline void parseTrue();
|
inline void parseTrue();
|
||||||
/// parse a Boolean "false"
|
/// parse a Boolean "false"
|
||||||
|
@ -2434,9 +2440,9 @@ json json::parser::parse()
|
||||||
// remember position of number's first character
|
// remember position of number's first character
|
||||||
const auto _firstpos_ = pos_ - 1;
|
const auto _firstpos_ = pos_ - 1;
|
||||||
|
|
||||||
while (next() and (std::isdigit(current_) || current_ == '.'
|
while (next() and (std::isdigit(current_) or current_ == '.'
|
||||||
|| current_ == 'e' || current_ == 'E'
|
or current_ == 'e' or current_ == 'E'
|
||||||
|| current_ == '+' || current_ == '-'));
|
or current_ == '+' or current_ == '-'));
|
||||||
|
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
|
@ -2512,7 +2518,7 @@ the error message \p msg), and the last read token.
|
||||||
|
|
||||||
@exception std::invalid_argument whenever the function is called
|
@exception std::invalid_argument whenever the function is called
|
||||||
*/
|
*/
|
||||||
void json::parser::error(const std::string& msg)
|
void json::parser::error(const std::string& msg) const
|
||||||
{
|
{
|
||||||
throw std::invalid_argument("parse error at position " +
|
throw std::invalid_argument("parse error at position " +
|
||||||
std::to_string(pos_) + ": " + msg +
|
std::to_string(pos_) + ": " + msg +
|
||||||
|
@ -2542,14 +2548,19 @@ std::string json::parser::parseString()
|
||||||
// the result of the parse process
|
// the result of the parse process
|
||||||
std::string result;
|
std::string result;
|
||||||
|
|
||||||
// iterate with pos_ over the whole string
|
// iterate with pos_ over the whole input until we found the end and return
|
||||||
|
// or we exit via error()
|
||||||
for (; pos_ < buffer_.size(); pos_++)
|
for (; pos_ < buffer_.size(); pos_++)
|
||||||
{
|
{
|
||||||
char currentChar = buffer_[pos_];
|
char currentChar = buffer_[pos_];
|
||||||
|
|
||||||
// uneven amount of backslashes means the user wants to escape something
|
if (not evenAmountOfBackslashes)
|
||||||
if (!evenAmountOfBackslashes)
|
|
||||||
{
|
{
|
||||||
|
// uneven amount of backslashes means the user wants to escape
|
||||||
|
// something so we know there is a case such as '\X' or '\\\X' but
|
||||||
|
// we don't know yet what X is.
|
||||||
|
// at this point in the code, the currentChar has the value of X.
|
||||||
|
|
||||||
// slash, backslash and quote are copied as is
|
// slash, backslash and quote are copied as is
|
||||||
if (currentChar == '/' or currentChar == '\\' or currentChar == '"')
|
if (currentChar == '/' or currentChar == '\\' or currentChar == '"')
|
||||||
{
|
{
|
||||||
|
@ -2557,8 +2568,8 @@ std::string json::parser::parseString()
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// all other characters are replaced by their respective
|
// all other characters are replaced by their respective special
|
||||||
// special character
|
// character
|
||||||
switch (currentChar)
|
switch (currentChar)
|
||||||
{
|
{
|
||||||
case 't':
|
case 't':
|
||||||
|
@ -2586,12 +2597,26 @@ std::string json::parser::parseString()
|
||||||
result += '\r';
|
result += '\r';
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case 'u':
|
||||||
|
{
|
||||||
|
// \uXXXX[\uXXXX] is used for escaping unicode, which
|
||||||
|
// has it's own subroutine.
|
||||||
|
result += parseUnicodeEscape();
|
||||||
|
// the parsing process has brought us one step behind
|
||||||
|
// the unicode escape sequence:
|
||||||
|
// \uXXXX
|
||||||
|
// ^
|
||||||
|
// we need to go one character back or the parser would
|
||||||
|
// skip the character we are currently pointing at as
|
||||||
|
// the for-loop will decrement pos_ after this iteration
|
||||||
|
pos_--;
|
||||||
|
break;
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
{
|
{
|
||||||
error("expected one of \\, /, b, f, n, r, t behind backslash.");
|
error("expected one of \\, /, b, f, n, r, t, u behind backslash.");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// TODO implement \uXXXX
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -2610,7 +2635,7 @@ std::string json::parser::parseString()
|
||||||
}
|
}
|
||||||
else if (currentChar != '\\')
|
else if (currentChar != '\\')
|
||||||
{
|
{
|
||||||
// All non-backslash characters are added to the end of the
|
// all non-backslash characters are added to the end of the
|
||||||
// result string. The only backslashes we want in the result
|
// result string. The only backslashes we want in the result
|
||||||
// are the ones that are escaped (which happens above).
|
// are the ones that are escaped (which happens above).
|
||||||
result += currentChar;
|
result += currentChar;
|
||||||
|
@ -2638,6 +2663,187 @@ std::string json::parser::parseString()
|
||||||
error("expected '\"'");
|
error("expected '\"'");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*!
|
||||||
|
Turns a code point into it's UTF-8 representation.
|
||||||
|
You should only pass numbers < 0x10ffff into this function
|
||||||
|
(everything else is a invalid code point).
|
||||||
|
|
||||||
|
@return the UTF-8 representation of the given code point
|
||||||
|
*/
|
||||||
|
std::string json::parser::codePointToUTF8(unsigned int codePoint) const
|
||||||
|
{
|
||||||
|
// this method contains a lot of bit manipulations to
|
||||||
|
// build the bytes for UTF-8.
|
||||||
|
|
||||||
|
// the '(... >> S) & 0xHH'-patterns are used to retrieve
|
||||||
|
// certain bits from the code points.
|
||||||
|
|
||||||
|
// all static casts in this method have boundary checks
|
||||||
|
|
||||||
|
// we initialize all strings with their final length
|
||||||
|
// (e.g. 1 to 4 bytes) to save the reallocations.
|
||||||
|
|
||||||
|
if (codePoint <= 0x7f)
|
||||||
|
{
|
||||||
|
// it's just a ASCII compatible codePoint,
|
||||||
|
// so we just interpret the point as a character
|
||||||
|
// and return ASCII
|
||||||
|
|
||||||
|
return std::string(1, static_cast<char>(codePoint));
|
||||||
|
}
|
||||||
|
// if true, we need two bytes to encode this as UTF-8
|
||||||
|
else if (codePoint <= 0x7ff)
|
||||||
|
{
|
||||||
|
// the 0xC0 enables the two most significant two bits
|
||||||
|
// to make this a two-byte UTF-8 character.
|
||||||
|
std::string result(2, static_cast<char>(0xC0 | ((codePoint >> 6) & 0x1F)));
|
||||||
|
result[1] = static_cast<char>(0x80 | (codePoint & 0x3F));
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
// if true, now we need three bytes to encode this as UTF-8
|
||||||
|
else if (codePoint <= 0xffff)
|
||||||
|
{
|
||||||
|
// the 0xE0 enables the three most significant two bits
|
||||||
|
// to make this a three-byte UTF-8 character.
|
||||||
|
std::string result(3, static_cast<char>(0xE0 | ((codePoint >> 12) & 0x0F)));
|
||||||
|
result[1] = static_cast<char>(0x80 | ((codePoint >> 6) & 0x3F));
|
||||||
|
result[2] = static_cast<char>(0x80 | (codePoint & 0x3F));
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
// if true, we need maximal four bytes to encode this as UTF-8
|
||||||
|
else if (codePoint <= 0x10ffff)
|
||||||
|
{
|
||||||
|
// the 0xE0 enables the four most significant two bits
|
||||||
|
// to make this a three-byte UTF-8 character.
|
||||||
|
std::string result(4, static_cast<char>(0xF0 | ((codePoint >> 18) & 0x07)));
|
||||||
|
result[1] = static_cast<char>(0x80 | ((codePoint >> 12) & 0x3F));
|
||||||
|
result[2] = static_cast<char>(0x80 | ((codePoint >> 6) & 0x3F));
|
||||||
|
result[3] = static_cast<char>(0x80 | (codePoint & 0x3F));
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Can't be tested without direct access to this private method.
|
||||||
|
std::string errorMessage = "Invalid codePoint: ";
|
||||||
|
errorMessage += codePoint;
|
||||||
|
error(errorMessage);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*!
|
||||||
|
Parses 4 hexadecimal characters as a number.
|
||||||
|
|
||||||
|
@return the value of the number the hexadecimal characters represent.
|
||||||
|
|
||||||
|
@pre pos_ is pointing to the first of the 4 hexadecimal characters.
|
||||||
|
|
||||||
|
@post pos_ is pointing to the character after the 4 hexadecimal characters.
|
||||||
|
*/
|
||||||
|
unsigned int json::parser::parse4HexCodePoint()
|
||||||
|
{
|
||||||
|
const auto startPos = pos_;
|
||||||
|
|
||||||
|
// check if the remaining buffer is long enough to even hold 4 characters
|
||||||
|
if (pos_ + 3 >= buffer_.size())
|
||||||
|
{
|
||||||
|
error("Got end of input while parsing unicode escape sequence \\uXXXX");
|
||||||
|
}
|
||||||
|
|
||||||
|
// make a string that can hold the pair
|
||||||
|
std::string hexCode(4, ' ');
|
||||||
|
|
||||||
|
for (; pos_ < startPos + 4; pos_++)
|
||||||
|
{
|
||||||
|
// no boundary check here as we already checked above
|
||||||
|
char currentChar = buffer_[pos_];
|
||||||
|
|
||||||
|
// check if we have a hexadecimal character
|
||||||
|
if ((currentChar >= '0' and currentChar <= '9')
|
||||||
|
or (currentChar >= 'a' and currentChar <= 'f')
|
||||||
|
or (currentChar >= 'A' and currentChar <= 'F'))
|
||||||
|
{
|
||||||
|
// all is well, we have valid hexadecimal chars
|
||||||
|
// so we copy that char into our string
|
||||||
|
hexCode[pos_ - startPos] = currentChar;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
error("Found non-hexadecimal character in unicode escape sequence!");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// the cast is safe as 4 hex characters can't present more than 16 bits
|
||||||
|
// the input to stoul was checked to contain only hexadecimal characters
|
||||||
|
// (see above)
|
||||||
|
return static_cast<unsigned int>(std::stoul(hexCode, nullptr, 16));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*!
|
||||||
|
Parses the unicode escape codes as defined in the ECMA-404.
|
||||||
|
The escape sequence has two forms:
|
||||||
|
1. \uXXXX
|
||||||
|
2. \uXXXX\uYYYY
|
||||||
|
where X and Y are a hexadecimal character (a-zA-Z0-9).
|
||||||
|
|
||||||
|
Form 1 just contains the unicode code point in the hexadecimal number XXXX.
|
||||||
|
Form 2 is encoding a UTF-16 surrogate pair. The high surrogate is XXXX, the low
|
||||||
|
surrogate is YYYY.
|
||||||
|
|
||||||
|
@return the UTF-8 character this unicode escape sequence escaped.
|
||||||
|
|
||||||
|
@pre pos_ is pointing at at the 'u' behind the first backslash.
|
||||||
|
|
||||||
|
@post pos_ is pointing at the character behind the last X (or Y in form 2).
|
||||||
|
*/
|
||||||
|
std::string json::parser::parseUnicodeEscape()
|
||||||
|
{
|
||||||
|
// jump to the first hex value
|
||||||
|
pos_++;
|
||||||
|
// parse the hex first hex values
|
||||||
|
unsigned int firstCodePoint = parse4HexCodePoint();
|
||||||
|
|
||||||
|
if (firstCodePoint >= 0xD800 and firstCodePoint <= 0xDBFF)
|
||||||
|
{
|
||||||
|
// we found invalid code points, which means we either have a malformed
|
||||||
|
// input or we found a high surrogate.
|
||||||
|
// we can only find out by seeing if the next character also wants to
|
||||||
|
// encode a unicode character (so, we have the \uXXXX\uXXXX case here).
|
||||||
|
|
||||||
|
// jump behind the next \u
|
||||||
|
pos_ += 2;
|
||||||
|
// try to parse the next hex values.
|
||||||
|
// the method does boundary checking for us, so no need to do that here
|
||||||
|
unsigned secondCodePoint = parse4HexCodePoint();
|
||||||
|
// ok, we have a low surrogate, check if it is a valid one
|
||||||
|
if (secondCodePoint >= 0xDC00 and secondCodePoint <= 0xDFFF)
|
||||||
|
{
|
||||||
|
// calculate the code point from the pair according to the spec
|
||||||
|
unsigned int finalCodePoint =
|
||||||
|
// high surrogate occupies the most significant 22 bits
|
||||||
|
(firstCodePoint << 10)
|
||||||
|
// low surrogate occupies the least significant 15 bits
|
||||||
|
+ secondCodePoint
|
||||||
|
// there is still the 0xD800, 0xDC00 and 0x10000 noise in
|
||||||
|
// the result
|
||||||
|
// so we have to substract with:
|
||||||
|
// (0xD800 << 10) + DC00 - 0x10000 = 0x35FDC00
|
||||||
|
- 0x35FDC00;
|
||||||
|
|
||||||
|
// we transform the calculated point into UTF-8
|
||||||
|
return codePointToUTF8(finalCodePoint);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
error("missing low surrogate");
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
// We have Form 1, so we just interpret the XXXX as a code point
|
||||||
|
return codePointToUTF8(firstCodePoint);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
This function is called in case a \p "t" is read in the main parse function
|
This function is called in case a \p "t" is read in the main parse function
|
||||||
@ref parse. In the standard, the \p "true" token is the only candidate, so the
|
@ref parse. In the standard, the \p "true" token is the only candidate, so the
|
||||||
|
|
54
src/json.cc
54
src/json.cc
|
@ -1988,9 +1988,9 @@ json json::parser::parse()
|
||||||
// remember position of number's first character
|
// remember position of number's first character
|
||||||
const auto _firstpos_ = pos_ - 1;
|
const auto _firstpos_ = pos_ - 1;
|
||||||
|
|
||||||
while (next() and (std::isdigit(current_) || current_ == '.'
|
while (next() and (std::isdigit(current_) or current_ == '.'
|
||||||
|| current_ == 'e' || current_ == 'E'
|
or current_ == 'e' or current_ == 'E'
|
||||||
|| current_ == '+' || current_ == '-'));
|
or current_ == '+' or current_ == '-'));
|
||||||
|
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
|
@ -2066,7 +2066,7 @@ the error message \p msg), and the last read token.
|
||||||
|
|
||||||
@exception std::invalid_argument whenever the function is called
|
@exception std::invalid_argument whenever the function is called
|
||||||
*/
|
*/
|
||||||
void json::parser::error(const std::string& msg)
|
void json::parser::error(const std::string& msg) const
|
||||||
{
|
{
|
||||||
throw std::invalid_argument("parse error at position " +
|
throw std::invalid_argument("parse error at position " +
|
||||||
std::to_string(pos_) + ": " + msg +
|
std::to_string(pos_) + ": " + msg +
|
||||||
|
@ -2102,7 +2102,7 @@ std::string json::parser::parseString()
|
||||||
{
|
{
|
||||||
char currentChar = buffer_[pos_];
|
char currentChar = buffer_[pos_];
|
||||||
|
|
||||||
if (!evenAmountOfBackslashes)
|
if (not evenAmountOfBackslashes)
|
||||||
{
|
{
|
||||||
// uneven amount of backslashes means the user wants to escape
|
// uneven amount of backslashes means the user wants to escape
|
||||||
// something so we know there is a case such as '\X' or '\\\X' but
|
// something so we know there is a case such as '\X' or '\\\X' but
|
||||||
|
@ -2110,9 +2110,7 @@ std::string json::parser::parseString()
|
||||||
// at this point in the code, the currentChar has the value of X.
|
// at this point in the code, the currentChar has the value of X.
|
||||||
|
|
||||||
// slash, backslash and quote are copied as is
|
// slash, backslash and quote are copied as is
|
||||||
if ( currentChar == '/'
|
if (currentChar == '/' or currentChar == '\\' or currentChar == '"')
|
||||||
|| currentChar == '\\'
|
|
||||||
|| currentChar == '"')
|
|
||||||
{
|
{
|
||||||
result += currentChar;
|
result += currentChar;
|
||||||
}
|
}
|
||||||
|
@ -2221,12 +2219,8 @@ You should only pass numbers < 0x10ffff into this function
|
||||||
(everything else is a invalid code point).
|
(everything else is a invalid code point).
|
||||||
|
|
||||||
@return the UTF-8 representation of the given code point
|
@return the UTF-8 representation of the given code point
|
||||||
|
|
||||||
@pre This method isn't accessing the members of the parser
|
|
||||||
|
|
||||||
@post This method isn't accessing the members of the parser
|
|
||||||
*/
|
*/
|
||||||
std::string json::parser::codePointToUTF8(unsigned int codePoint)
|
std::string json::parser::codePointToUTF8(unsigned int codePoint) const
|
||||||
{
|
{
|
||||||
// this method contains a lot of bit manipulations to
|
// this method contains a lot of bit manipulations to
|
||||||
// build the bytes for UTF-8.
|
// build the bytes for UTF-8.
|
||||||
|
@ -2239,7 +2233,6 @@ std::string json::parser::codePointToUTF8(unsigned int codePoint)
|
||||||
// we initialize all strings with their final length
|
// we initialize all strings with their final length
|
||||||
// (e.g. 1 to 4 bytes) to save the reallocations.
|
// (e.g. 1 to 4 bytes) to save the reallocations.
|
||||||
|
|
||||||
|
|
||||||
if (codePoint <= 0x7f)
|
if (codePoint <= 0x7f)
|
||||||
{
|
{
|
||||||
// it's just a ASCII compatible codePoint,
|
// it's just a ASCII compatible codePoint,
|
||||||
|
@ -2309,15 +2302,15 @@ unsigned int json::parser::parse4HexCodePoint()
|
||||||
// make a string that can hold the pair
|
// make a string that can hold the pair
|
||||||
std::string hexCode(4, ' ');
|
std::string hexCode(4, ' ');
|
||||||
|
|
||||||
for(; pos_ < startPos + 4; pos_++)
|
for (; pos_ < startPos + 4; pos_++)
|
||||||
{
|
{
|
||||||
// no boundary check here as we already checked above
|
// no boundary check here as we already checked above
|
||||||
char currentChar = buffer_[pos_];
|
char currentChar = buffer_[pos_];
|
||||||
|
|
||||||
// check if we have a hexadecimal character
|
// check if we have a hexadecimal character
|
||||||
if ( (currentChar >= '0' && currentChar <= '9')
|
if ((currentChar >= '0' and currentChar <= '9')
|
||||||
|| (currentChar >= 'a' && currentChar <= 'f')
|
or (currentChar >= 'a' and currentChar <= 'f')
|
||||||
|| (currentChar >= 'A' && currentChar <= 'F'))
|
or (currentChar >= 'A' and currentChar <= 'F'))
|
||||||
{
|
{
|
||||||
// all is well, we have valid hexadecimal chars
|
// all is well, we have valid hexadecimal chars
|
||||||
// so we copy that char into our string
|
// so we copy that char into our string
|
||||||
|
@ -2358,8 +2351,7 @@ std::string json::parser::parseUnicodeEscape()
|
||||||
// parse the hex first hex values
|
// parse the hex first hex values
|
||||||
unsigned int firstCodePoint = parse4HexCodePoint();
|
unsigned int firstCodePoint = parse4HexCodePoint();
|
||||||
|
|
||||||
|
if (firstCodePoint >= 0xD800 and firstCodePoint <= 0xDBFF)
|
||||||
if (firstCodePoint >= 0xD800 && firstCodePoint <= 0xDBFF)
|
|
||||||
{
|
{
|
||||||
// we found invalid code points, which means we either have a malformed
|
// we found invalid code points, which means we either have a malformed
|
||||||
// input or we found a high surrogate.
|
// input or we found a high surrogate.
|
||||||
|
@ -2372,25 +2364,27 @@ std::string json::parser::parseUnicodeEscape()
|
||||||
// the method does boundary checking for us, so no need to do that here
|
// the method does boundary checking for us, so no need to do that here
|
||||||
unsigned secondCodePoint = parse4HexCodePoint();
|
unsigned secondCodePoint = parse4HexCodePoint();
|
||||||
// ok, we have a low surrogate, check if it is a valid one
|
// ok, we have a low surrogate, check if it is a valid one
|
||||||
if (secondCodePoint >= 0xDC00 && secondCodePoint <= 0xDFFF)
|
if (secondCodePoint >= 0xDC00 and secondCodePoint <= 0xDFFF)
|
||||||
{
|
{
|
||||||
// calculate the code point from the pair according to the spec
|
// calculate the code point from the pair according to the spec
|
||||||
unsigned int finalCodePoint =
|
unsigned int finalCodePoint =
|
||||||
// high surrogate occupies the most significant 22 bits
|
// high surrogate occupies the most significant 22 bits
|
||||||
(firstCodePoint << 10)
|
(firstCodePoint << 10)
|
||||||
// low surrogate occupies the least significant 15 bits
|
// low surrogate occupies the least significant 15 bits
|
||||||
+ secondCodePoint
|
+ secondCodePoint
|
||||||
// there is still the 0xD800, 0xDC00 and 0x10000 noise in
|
// there is still the 0xD800, 0xDC00 and 0x10000 noise in
|
||||||
// the result
|
// the result
|
||||||
// so we have to substract with:
|
// so we have to substract with:
|
||||||
// (0xD800 << 10) + DC00 - 0x10000 = 0x35FDC00
|
// (0xD800 << 10) + DC00 - 0x10000 = 0x35FDC00
|
||||||
- 0x35FDC00;
|
- 0x35FDC00;
|
||||||
|
|
||||||
// we transform the calculated point into UTF-8
|
// we transform the calculated point into UTF-8
|
||||||
return codePointToUTF8(finalCodePoint);
|
return codePointToUTF8(finalCodePoint);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
{
|
||||||
error("missing low surrogate");
|
error("missing low surrogate");
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
// We have Form 1, so we just interpret the XXXX as a code point
|
// We have Form 1, so we just interpret the XXXX as a code point
|
||||||
|
|
|
@ -418,11 +418,11 @@ class json
|
||||||
/// read the next character, stripping whitespace
|
/// read the next character, stripping whitespace
|
||||||
bool next();
|
bool next();
|
||||||
/// raise an exception with an error message
|
/// raise an exception with an error message
|
||||||
inline void error(const std::string&) __attribute__((noreturn));
|
inline void error(const std::string&) const __attribute__((noreturn));
|
||||||
/// parse a quoted string
|
/// parse a quoted string
|
||||||
inline std::string parseString();
|
inline std::string parseString();
|
||||||
/// transforms a unicode codepoint to it's UTF-8 presentation
|
/// transforms a unicode codepoint to it's UTF-8 presentation
|
||||||
std::string codePointToUTF8(unsigned int codePoint);
|
std::string codePointToUTF8(unsigned int codePoint) const;
|
||||||
/// parses 4 hex characters that represent a unicode code point
|
/// parses 4 hex characters that represent a unicode code point
|
||||||
inline unsigned int parse4HexCodePoint();
|
inline unsigned int parse4HexCodePoint();
|
||||||
/// parses \uXXXX[\uXXXX] unicode escape characters
|
/// parses \uXXXX[\uXXXX] unicode escape characters
|
||||||
|
|
Loading…
Reference in a new issue