small tidy up after pull request #22

- removed IDE leftovers - adjusted const’ness of some functions - style guide
2015-01-11 15:16:14 +01:00 · 2015-01-11 15:16:14 +01:00 · bd05c7a18a
commit bd05c7a18a
parent 6533b33387
9 changed files with 247 additions and 107 deletions
--- a/.gitignore
+++ b/.gitignore
@ -46,3 +46,5 @@ CMakeFiles
 libjson.a
 Testing
 .idea
--- a/.idea/codeStyleSettings.xml
+++ b/.idea/codeStyleSettings.xml
@ -1,35 +0,0 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
  <component name="ProjectCodeStyleSettingsManager">
    <option name="PER_PROJECT_SETTINGS">
      <value>
        <Objective-C-extensions>
          <option name="GENERATE_INSTANCE_VARIABLES_FOR_PROPERTIES" value="ASK" />
          <option name="RELEASE_STYLE" value="IVAR" />
          <file>
            <option name="com.jetbrains.objc.util.OCDeclarationKind" value="Import" />
            <option name="com.jetbrains.objc.util.OCDeclarationKind" value="Macro" />
            <option name="com.jetbrains.objc.util.OCDeclarationKind" value="Typedef" />
            <option name="com.jetbrains.objc.util.OCDeclarationKind" value="Struct" />
            <option name="com.jetbrains.objc.util.OCDeclarationKind" value="Enum" />
            <option name="com.jetbrains.objc.util.OCDeclarationKind" value="ClassPredef" />
            <option name="com.jetbrains.objc.util.OCDeclarationKind" value="Constant" />
            <option name="com.jetbrains.objc.util.OCDeclarationKind" value="Global" />
            <option name="com.jetbrains.objc.util.OCDeclarationKind" value="FunctionPredecl" />
            <option name="com.jetbrains.objc.util.OCDeclarationKind" value="Function" />
            <option name="com.jetbrains.objc.util.OCDeclarationKind" value="Class" />
          </file>
          <class>
            <option name="com.jetbrains.objc.util.OCDeclarationKind" value="Property" />
            <option name="com.jetbrains.objc.util.OCDeclarationKind" value="Synthesize" />
            <option name="com.jetbrains.objc.util.OCDeclarationKind" value="InitMethod" />
            <option name="com.jetbrains.objc.util.OCDeclarationKind" value="StaticMethod" />
            <option name="com.jetbrains.objc.util.OCDeclarationKind" value="InstanceMethod" />
            <option name="com.jetbrains.objc.util.OCDeclarationKind" value="DeallocMethod" />
          </class>
        </Objective-C-extensions>
      </value>
    </option>
    <option name="PREFERRED_PROJECT_CODE_STYLE" value="Default (1)" />
  </component>
 </project>
--- a/.idea/json.iml
+++ b/.idea/json.iml
@ -1,8 +0,0 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <module type="CPP_MODULE" version="4">
  <component name="NewModuleRootManager">
    <content url="file://$MODULE_DIR$" />
    <orderEntry type="inheritedJdk" />
    <orderEntry type="sourceFolder" forTests="false" />
  </component>
 </module>
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@ -1,5 +0,0 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
  <component name="CMakeWorkspace" PROJECT_DIR="$PROJECT_DIR$" />
  <component name="ProjectRootManager" version="2" />
 </project>
--- a/.idea/modules.xml
+++ b/.idea/modules.xml
@ -1,8 +0,0 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
  <component name="ProjectModuleManager">
    <modules>
      <module fileurl="file://$PROJECT_DIR$/.idea/json.iml" filepath="$PROJECT_DIR$/.idea/json.iml" />
    </modules>
  </component>
 </project>
--- a/.idea/vcs.xml
+++ b/.idea/vcs.xml
@ -1,6 +0,0 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
  <component name="VcsDirectoryMappings">
    <mapping directory="$PROJECT_DIR$" vcs="Git" />
  </component>
 </project>
--- a/header_only/json.h
+++ b/header_only/json.h
@ -418,9 +418,15 @@ class json
        /// read the next character, stripping whitespace
        bool next();
        /// raise an exception with an error message
-        inline void error(const std::string&) __attribute__((noreturn));
+        inline void error(const std::string&) const __attribute__((noreturn));
        /// parse a quoted string
        inline std::string parseString();
        /// transforms a unicode codepoint to it's UTF-8 presentation
        std::string codePointToUTF8(unsigned int codePoint) const;
        /// parses 4 hex characters that represent a unicode code point
        inline unsigned int parse4HexCodePoint();
        /// parses \uXXXX[\uXXXX] unicode escape characters
        inline std::string parseUnicodeEscape();
        /// parse a Boolean "true"
        inline void parseTrue();
        /// parse a Boolean "false"
@ -2434,9 +2440,9 @@ json json::parser::parse()
            // remember position of number's first character
            const auto _firstpos_ = pos_ - 1;
-            while (next() and (std::isdigit(current_) || current_ == '.'
+            while (next() and (std::isdigit(current_) or current_ == '.'
-                               || current_ == 'e' || current_ == 'E'
+                               or current_ == 'e' or current_ == 'E'
-                               || current_ == '+' || current_ == '-'));
+                               or current_ == '+' or current_ == '-'));
            try
            {
@ -2512,7 +2518,7 @@ the error message \p msg), and the last read token.
@exception std::invalid_argument whenever the function is called
 */
-void json::parser::error(const std::string& msg)
+void json::parser::error(const std::string& msg) const
 {
    throw std::invalid_argument("parse error at position " +
                                std::to_string(pos_) + ": " + msg +
@ -2542,14 +2548,19 @@ std::string json::parser::parseString()
    // the result of the parse process
    std::string result;
-    // iterate with pos_ over the whole string
+    // iterate with pos_ over the whole input until we found the end and return
    // or we exit via error()
    for (; pos_ < buffer_.size(); pos_++)
    {
        char currentChar = buffer_[pos_];
-        // uneven amount of backslashes means the user wants to escape something
+        if (not evenAmountOfBackslashes)
        if (!evenAmountOfBackslashes)
        {
            // uneven amount of backslashes means the user wants to escape
            // something so we know there is a case such as '\X' or '\\\X' but
            // we don't know yet what X is.
            // at this point in the code, the currentChar has the value of X.
            // slash, backslash and quote are copied as is
            if (currentChar == '/' or currentChar == '\\' or currentChar == '"')
            {
@ -2557,8 +2568,8 @@ std::string json::parser::parseString()
            }
            else
            {
-                // all other characters are replaced by their respective
+                // all other characters are replaced by their respective special
-                // special character
+                // character
                switch (currentChar)
                {
                    case 't':
@ -2586,12 +2597,26 @@ std::string json::parser::parseString()
                        result += '\r';
                        break;
                    }
                    case 'u':
                    {
                        // \uXXXX[\uXXXX] is used for escaping unicode, which
                        // has it's own subroutine.
                        result += parseUnicodeEscape();
                        // the parsing process has brought us one step behind
                        // the unicode escape sequence:
                        // \uXXXX
                        //       ^
                        // we need to go one character back or the parser would
                        // skip the character we are currently pointing at as
                        // the for-loop will decrement pos_ after this iteration
                        pos_--;
                        break;
                    }
                    default:
                    {
-                        error("expected one of \\, /, b, f, n, r, t behind backslash.");
+                        error("expected one of \\, /, b, f, n, r, t, u behind backslash.");
                    }
                }
                // TODO implement \uXXXX
            }
        }
        else
@ -2610,7 +2635,7 @@ std::string json::parser::parseString()
            }
            else if (currentChar != '\\')
            {
-                // All non-backslash characters are added to the end of the
+                // all non-backslash characters are added to the end of the
                // result string. The only backslashes we want in the result
                // are the ones that are escaped (which happens above).
                result += currentChar;
@ -2638,6 +2663,187 @@ std::string json::parser::parseString()
    error("expected '\"'");
 }
 /*!
 Turns a code point into it's UTF-8 representation.
 You should only pass numbers < 0x10ffff into this function
 (everything else is a invalid code point).
@return the UTF-8 representation of the given code point
 */
 std::string json::parser::codePointToUTF8(unsigned int codePoint) const
 {
    // this method contains a lot of bit manipulations to
    // build the bytes for UTF-8.
    // the '(... >> S) & 0xHH'-patterns are used to retrieve
    // certain bits from the code points.
    // all static casts in this method have boundary checks
    // we initialize all strings with their final length
    // (e.g. 1 to 4 bytes) to save the reallocations.
    if (codePoint <= 0x7f)
    {
        // it's just a ASCII compatible codePoint,
        // so we just interpret the point as a character
        // and return ASCII
        return std::string(1, static_cast<char>(codePoint));
    }
    // if true, we need two bytes to encode this as UTF-8
    else if (codePoint <= 0x7ff)
    {
        // the 0xC0 enables the two most significant two bits
        // to make this a two-byte UTF-8 character.
        std::string result(2, static_cast<char>(0xC0 | ((codePoint >> 6) & 0x1F)));
        result[1] = static_cast<char>(0x80 | (codePoint & 0x3F));
        return result;
    }
    // if true, now we need three bytes to encode this as UTF-8
    else if (codePoint <= 0xffff)
    {
        // the 0xE0 enables the three most significant two bits
        // to make this a three-byte UTF-8 character.
        std::string result(3, static_cast<char>(0xE0 | ((codePoint >> 12) & 0x0F)));
        result[1] = static_cast<char>(0x80 | ((codePoint >> 6) & 0x3F));
        result[2] = static_cast<char>(0x80 | (codePoint & 0x3F));
        return result;
    }
    // if true, we need maximal four bytes to encode this as UTF-8
    else if (codePoint <= 0x10ffff)
    {
        // the 0xE0 enables the four most significant two bits
        // to make this a three-byte UTF-8 character.
        std::string result(4, static_cast<char>(0xF0 | ((codePoint >> 18) & 0x07)));
        result[1] = static_cast<char>(0x80 | ((codePoint >> 12) & 0x3F));
        result[2] = static_cast<char>(0x80 | ((codePoint >> 6) & 0x3F));
        result[3] = static_cast<char>(0x80 | (codePoint & 0x3F));
        return result;
    }
    else
    {
        // Can't be tested without direct access to this private method.
        std::string errorMessage = "Invalid codePoint: ";
        errorMessage += codePoint;
        error(errorMessage);
    }
 }
 /*!
 Parses 4 hexadecimal characters as a number.
@return the value of the number the hexadecimal characters represent.
@pre  pos_ is pointing to the first of the 4 hexadecimal characters.
@post pos_ is pointing to the character after the 4 hexadecimal characters.
 */
 unsigned int json::parser::parse4HexCodePoint()
 {
    const auto startPos = pos_;
    // check if the  remaining buffer is long enough to even hold 4 characters
    if (pos_ + 3 >= buffer_.size())
    {
        error("Got end of input while parsing unicode escape sequence \\uXXXX");
    }
    // make a string that can hold the pair
    std::string hexCode(4, ' ');
    for (; pos_ < startPos + 4; pos_++)
    {
        // no boundary check here as we already checked above
        char currentChar = buffer_[pos_];
        // check if we have a hexadecimal character
        if ((currentChar >= '0' and currentChar <= '9')
                or (currentChar >= 'a' and currentChar <= 'f')
                or (currentChar >= 'A' and currentChar <= 'F'))
        {
            // all is well, we have valid hexadecimal chars
            // so we copy that char into our string
            hexCode[pos_ - startPos] = currentChar;
        }
        else
        {
            error("Found non-hexadecimal character in unicode escape sequence!");
        }
    }
    // the cast is safe as 4 hex characters can't present more than 16 bits
    // the input to stoul was checked to contain only hexadecimal characters
    // (see above)
    return static_cast<unsigned int>(std::stoul(hexCode, nullptr, 16));
 }
 /*!
 Parses the unicode escape codes as defined in the ECMA-404.
 The escape sequence has two forms:
 1. \uXXXX
 2. \uXXXX\uYYYY
 where X and Y are a hexadecimal character (a-zA-Z0-9).
 Form 1 just contains the unicode code point in the hexadecimal number XXXX.
 Form 2 is encoding a UTF-16 surrogate pair. The high surrogate is XXXX, the low
 surrogate is YYYY.
@return the UTF-8 character this unicode escape sequence escaped.
@pre  pos_ is pointing at at the 'u' behind the first backslash.
@post pos_ is pointing at the character behind the last X (or Y in form 2).
 */
 std::string json::parser::parseUnicodeEscape()
 {
    // jump to the first hex value
    pos_++;
    // parse the hex first hex values
    unsigned int firstCodePoint = parse4HexCodePoint();
    if (firstCodePoint >= 0xD800 and firstCodePoint <= 0xDBFF)
    {
        // we found invalid code points, which means we either have a malformed
        // input or we found a high surrogate.
        // we can only find out by seeing if the next character also wants to
        // encode a unicode character (so, we have the \uXXXX\uXXXX case here).
        // jump behind the next \u
        pos_ += 2;
        // try to parse the next hex values.
        // the method does boundary checking for us, so no need to do that here
        unsigned secondCodePoint = parse4HexCodePoint();
        // ok, we have a low surrogate, check if it is a valid one
        if (secondCodePoint >= 0xDC00 and secondCodePoint <= 0xDFFF)
        {
            // calculate the code point from the pair according to the spec
            unsigned int finalCodePoint =
                // high surrogate occupies the most significant 22 bits
                (firstCodePoint << 10)
                // low surrogate occupies the least significant 15 bits
                + secondCodePoint
                // there is still the 0xD800, 0xDC00 and 0x10000 noise in
                // the result
                // so we have to substract with:
                // (0xD800 << 10) + DC00 - 0x10000 = 0x35FDC00
                - 0x35FDC00;
            // we transform the calculated point into UTF-8
            return codePointToUTF8(finalCodePoint);
        }
        else
        {
            error("missing low surrogate");
        }
    }
    // We have Form 1, so we just interpret the XXXX as a code point
    return codePointToUTF8(firstCodePoint);
 }
 /*!
 This function is called in case a \p "t" is read in the main parse function
@ref parse. In the standard, the \p "true" token is the only candidate, so the
--- a/src/json.cc
+++ b/src/json.cc
@ -1988,9 +1988,9 @@ json json::parser::parse()
            // remember position of number's first character
            const auto _firstpos_ = pos_ - 1;
-            while (next() and (std::isdigit(current_) || current_ == '.'
+            while (next() and (std::isdigit(current_) or current_ == '.'
-                               || current_ == 'e' || current_ == 'E'
+                               or current_ == 'e' or current_ == 'E'
-                               || current_ == '+' || current_ == '-'));
+                               or current_ == '+' or current_ == '-'));
            try
            {
@ -2066,7 +2066,7 @@ the error message \p msg), and the last read token.
@exception std::invalid_argument whenever the function is called
 */
-void json::parser::error(const std::string& msg)
+void json::parser::error(const std::string& msg) const
 {
    throw std::invalid_argument("parse error at position " +
                                std::to_string(pos_) + ": " + msg +
@ -2102,7 +2102,7 @@ std::string json::parser::parseString()
    {
        char currentChar = buffer_[pos_];
-        if (!evenAmountOfBackslashes)
+        if (not evenAmountOfBackslashes)
        {
            // uneven amount of backslashes means the user wants to escape
            // something so we know there is a case such as '\X' or '\\\X' but
@ -2110,9 +2110,7 @@ std::string json::parser::parseString()
            // at this point in the code, the currentChar has the value of X.
            // slash, backslash and quote are copied as is
-            if (   currentChar == '/'
+            if (currentChar == '/' or currentChar == '\\' or currentChar == '"')
                || currentChar == '\\'
                || currentChar == '"')
            {
                result += currentChar;
            }
@ -2221,12 +2219,8 @@ You should only pass numbers < 0x10ffff into this function
 (everything else is a invalid code point).
@return the UTF-8 representation of the given code point
@pre  This method isn't accessing the members of the parser
@post This method isn't accessing the members of the parser
 */
-std::string json::parser::codePointToUTF8(unsigned int codePoint)
+std::string json::parser::codePointToUTF8(unsigned int codePoint) const
 {
    // this method contains a lot of bit manipulations to
    // build the bytes for UTF-8.
@ -2239,7 +2233,6 @@ std::string json::parser::codePointToUTF8(unsigned int codePoint)
    // we initialize all strings with their final length
    // (e.g. 1 to 4 bytes) to save the reallocations.
    if (codePoint <= 0x7f)
    {
        // it's just a ASCII compatible codePoint,
@ -2309,15 +2302,15 @@ unsigned int json::parser::parse4HexCodePoint()
    // make a string that can hold the pair
    std::string hexCode(4, ' ');
-    for(; pos_ < startPos + 4; pos_++)
+    for (; pos_ < startPos + 4; pos_++)
    {
        // no boundary check here as we already checked above
        char currentChar = buffer_[pos_];
        // check if we have a hexadecimal character
-        if (   (currentChar >= '0' && currentChar <= '9')
+        if ((currentChar >= '0' and currentChar <= '9')
-            || (currentChar >= 'a' && currentChar <= 'f')
+                or (currentChar >= 'a' and currentChar <= 'f')
-            || (currentChar >= 'A' && currentChar <= 'F'))
+                or (currentChar >= 'A' and currentChar <= 'F'))
        {
            // all is well, we have valid hexadecimal chars
            // so we copy that char into our string
@ -2358,8 +2351,7 @@ std::string json::parser::parseUnicodeEscape()
    // parse the hex first hex values
    unsigned int firstCodePoint = parse4HexCodePoint();
-
+    if (firstCodePoint >= 0xD800 and firstCodePoint <= 0xDBFF)
    if (firstCodePoint >= 0xD800 && firstCodePoint <= 0xDBFF)
    {
        // we found invalid code points, which means we either have a malformed
        // input or we found a high surrogate.
@ -2372,25 +2364,27 @@ std::string json::parser::parseUnicodeEscape()
        // the method does boundary checking for us, so no need to do that here
        unsigned secondCodePoint = parse4HexCodePoint();
        // ok, we have a low surrogate, check if it is a valid one
-        if (secondCodePoint >= 0xDC00 && secondCodePoint <= 0xDFFF)
+        if (secondCodePoint >= 0xDC00 and secondCodePoint <= 0xDFFF)
        {
            // calculate the code point from the pair according to the spec
            unsigned int finalCodePoint =
-                    // high surrogate occupies the most significant 22 bits
+                // high surrogate occupies the most significant 22 bits
-                    (firstCodePoint << 10)
+                (firstCodePoint << 10)
-                    // low surrogate occupies the least significant 15 bits
+                // low surrogate occupies the least significant 15 bits
-                    + secondCodePoint
+                + secondCodePoint
-                    // there is still the 0xD800, 0xDC00 and 0x10000 noise in
+                // there is still the 0xD800, 0xDC00 and 0x10000 noise in
-                    // the result
+                // the result
-                    // so we have to substract with:
+                // so we have to substract with:
-                    // (0xD800 << 10) + DC00 - 0x10000 = 0x35FDC00
+                // (0xD800 << 10) + DC00 - 0x10000 = 0x35FDC00
-                    - 0x35FDC00;
+                - 0x35FDC00;
            // we transform the calculated point into UTF-8
            return codePointToUTF8(finalCodePoint);
        }
        else
        {
            error("missing low surrogate");
        }
    }
    // We have Form 1, so we just interpret the XXXX as a code point
--- a/src/json.h
+++ b/src/json.h
@ -418,11 +418,11 @@ class json
        /// read the next character, stripping whitespace
        bool next();
        /// raise an exception with an error message
-        inline void error(const std::string&) __attribute__((noreturn));
+        inline void error(const std::string&) const __attribute__((noreturn));
        /// parse a quoted string
        inline std::string parseString();
        /// transforms a unicode codepoint to it's UTF-8 presentation
-        std::string codePointToUTF8(unsigned int codePoint);
+        std::string codePointToUTF8(unsigned int codePoint) const;
        /// parses 4 hex characters that represent a unicode code point
        inline unsigned int parse4HexCodePoint();
        /// parses \uXXXX[\uXXXX] unicode escape characters