From 90adf6ec20c581055d748acc1b99b61af58365a7 Mon Sep 17 00:00:00 2001
From: Perry Kundert <perry@hardconsulting.com>
Date: Mon, 2 Oct 2017 08:50:15 -0700
Subject: [PATCH 01/17] Simplify get_token_string, unnecessary buffering,
 handle Byte Order Mark

---
 src/json.hpp | 176 +++++++++++++++++----------------------------------
 1 file changed, 59 insertions(+), 117 deletions(-)
diff --git a/src/json.hpp b/src/json.hpp
index 24773823..942acb87 100644
--- a/src/json.hpp
+++ b/src/json.hpp
@@ -1398,119 +1398,60 @@ constexpr T static_const<T>::value;
 struct input_adapter_protocol
 {
     virtual int get_character() = 0;
-    virtual std::string read(std::size_t offset, std::size_t length) = 0;
     virtual ~input_adapter_protocol() = default;
 };
 
 /// a type to simplify interfaces
 using input_adapter_t = std::shared_ptr<input_adapter_protocol>;
 
-/// input adapter for cached stream input
-template<std::size_t BufferSize>
-class cached_input_stream_adapter : public input_adapter_protocol
+
+/// input adapter for a (caching) istream.  Ignores a UFT Byte Order Mark at beginning of input.
+class input_stream_adapter : public input_adapter_protocol
 {
   public:
-    explicit cached_input_stream_adapter(std::istream& i)
-        : is(i), start_position(is.tellg())
+    explicit input_stream_adapter(std::istream& i)
+        : is(i)
     {
-        fill_buffer();
-
-        // skip byte order mark
-        if (fill_size >= 3 and buffer[0] == '\xEF' and buffer[1] == '\xBB' and buffer[2] == '\xBF')
-        {
-            buffer_pos += 3;
-            processed_chars += 3;
-        }
+	// Ignore Byte Order Mark at start of input
+	int c;
+	if (( c = get_character() ) == 0xEF )
+	{
+	    if (( c = get_character() ) == 0xBB )
+	    {
+		if (( c = get_character() ) == 0xBF )
+		{
+		    return; // Ignore BOM
+		}
+		else if ( c != std::char_traits<char>::eof() )
+		{
+		    is.unget();
+		}
+		is.putback( '\xBB' );
+	    }
+	    else if ( c != std::char_traits<char>::eof() )
+	    {
+		is.unget();
+	    }
+	    is.putback( '\xEF' );
+	}
+	else if ( c != std::char_traits<char>::eof() )
+	{
+	    is.unget(); // Not BOM.  Process as usual.
+	}
     }
 
-    ~cached_input_stream_adapter() override
-    {
-        // clear stream flags
-        is.clear();
-        // We initially read a lot of characters into the buffer, and we may
-        // not have processed all of them. Therefore, we need to "rewind" the
-        // stream after the last processed char.
-        is.seekg(start_position);
-        is.ignore(static_cast<std::streamsize>(processed_chars));
-        // clear stream flags
-        is.clear();
-    }
+    ~input_stream_adapter() override {}
 
     int get_character() override
     {
-        // check if refilling is necessary and possible
-        if (buffer_pos == fill_size and not eof)
-        {
-            fill_buffer();
-
-            // check and remember that filling did not yield new input
-            if (fill_size == 0)
-            {
-                eof = true;
-                return std::char_traits<char>::eof();
-            }
-
-            // the buffer is ready
-            buffer_pos = 0;
-        }
-
-        ++processed_chars;
-        assert(buffer_pos < buffer.size());
-        return buffer[buffer_pos++] & 0xFF;
-    }
-
-    std::string read(std::size_t offset, std::size_t length) override
-    {
-        // create buffer
-        std::string result(length, '\0');
-
-        // save stream position
-        const auto current_pos = is.tellg();
-        // save stream flags
-        const auto flags = is.rdstate();
-
-        // clear stream flags
-        is.clear();
-        // set stream position
-        is.seekg(static_cast<std::streamoff>(offset));
-        // read bytes
-        is.read(&result[0], static_cast<std::streamsize>(length));
-
-        // reset stream position
-        is.seekg(current_pos);
-        // reset stream flags
-        is.setstate(flags);
-
-        return result;
+	int c = is.get();
+	return c  == std::char_traits<char>::eof() ? c : ( c & 0xFF );
     }
 
   private:
-    void fill_buffer()
-    {
-        // fill
-        is.read(buffer.data(), static_cast<std::streamsize>(buffer.size()));
-        // store number of bytes in the buffer
-        fill_size = static_cast<size_t>(is.gcount());
-    }
 
     /// the associated input stream
     std::istream& is;
-
-    /// chars returned via get_character()
-    std::size_t processed_chars = 0;
-    /// chars processed in the current buffer
-    std::size_t buffer_pos = 0;
-
-    /// whether stream reached eof
-    bool eof = false;
-    /// how many chars have been copied to the buffer by last (re)fill
-    std::size_t fill_size = 0;
-
-    /// position of the stream when we started
-    const std::streampos start_position;
-
-    /// internal buffer
-    std::array<char, BufferSize> buffer{{}};
 };
 
 /// input adapter for buffer input
@@ -1541,13 +1482,6 @@ class input_buffer_adapter : public input_adapter_protocol
         return std::char_traits<char>::eof();
     }
 
-    std::string read(std::size_t offset, std::size_t length) override
-    {
-        // avoid reading too many characters
-        const auto max_length = static_cast<size_t>(limit - start);
-        return std::string(start + offset, (std::min)(length, max_length - offset));
-    }
-
   private:
     /// pointer to the current character
     const char* cursor;
@@ -1564,11 +1498,11 @@ class input_adapter
 
     /// input adapter for input stream
     input_adapter(std::istream& i)
-        : ia(std::make_shared<cached_input_stream_adapter<16384>>(i)) {}
+        : ia(std::make_shared<input_stream_adapter>(i)) {}
 
     /// input adapter for input stream
     input_adapter(std::istream&& i)
-        : ia(std::make_shared<cached_input_stream_adapter<16384>>(i)) {}
+        : ia(std::make_shared<input_stream_adapter>(i)) {}
 
     /// input adapter for buffer
     template<typename CharT,
@@ -2624,8 +2558,7 @@ scan_number_any2:
 scan_number_done:
         // unget the character after the number (we only read it to know that
         // we are done scanning a number)
-        --chars_read;
-        next_unget = true;
+	unget();
 
         // terminate token
         add('\0');
@@ -2702,21 +2635,33 @@ scan_number_done:
     // input management
     /////////////////////
 
-    /// reset yytext
+    /// reset yytext; current character is beginning of token
     void reset() noexcept
     {
         yylen = 0;
         start_pos = chars_read - 1;
+	token_string = static_cast<char>( current );
     }
 
     /// get a character from the input
     int get()
     {
         ++chars_read;
-        return next_unget ? (next_unget = false, current)
-               : (current = ia->get_character());
+	int c = next_unget ? (next_unget = false, current)
+	    : (current = ia->get_character());
+	token_string += static_cast<char>( c );
+	return c;
     }
 
+    /// unget current character (return it again on next get)
+    void unget()
+    {
+        --chars_read;
+        next_unget = true;
+	if (token_string.size() > 0)
+	    token_string.resize( token_string.size() - 1 );
+    }
+	
     /// add a character to yytext
     void add(int c)
     {
@@ -2774,19 +2719,14 @@ scan_number_done:
     /// return the last read token (for errors only)
     std::string get_token_string() const
     {
-        // get the raw byte sequence of the last token
-        std::string s = ia->read(start_pos, chars_read - start_pos);
-
         // escape control characters
         std::string result;
-        for (auto c : s)
+        for (auto c : token_string)
         {
-            if (c == '\0' or c == std::char_traits<char>::eof())
-            {
-                // ignore EOF
-                continue;
-            }
-            else if ('\x00' <= c and c <= '\x1f')
+	    if ( c == std::char_traits<char>::eof() ) {
+		continue;
+	    }
+	    else if ('\x00' <= c and c <= '\x1f')
             {
                 // escape control characters
                 std::stringstream ss;
@@ -2892,6 +2832,8 @@ scan_number_done:
     std::size_t chars_read = 0;
     /// the start position of the current token
     std::size_t start_pos = 0;
+    /// raw input token string (for error messages)
+    std::string token_string = "";
 
     /// buffer for variable-length tokens (numbers, strings)
     std::vector<char> yytext = std::vector<char>(1024, '\0');

From f585fe4eece44413982900f3b1b724cb7eabe541 Mon Sep 17 00:00:00 2001
From: Perry Kundert <perry@hardconsulting.com>
Date: Mon, 2 Oct 2017 14:17:23 -0700
Subject: [PATCH 02/17] Test to confirm parsing of multiple JSON records in a
 istream #367

---
 test/src/unit-regression.cpp | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/test/src/unit-regression.cpp b/test/src/unit-regression.cpp
index 0c0d148d..2d30a52c 100644
--- a/test/src/unit-regression.cpp
+++ b/test/src/unit-regression.cpp
@@ -1233,4 +1233,24 @@ TEST_CASE("regression tests")
                               "[json.exception.type_error.302] type must be array, but is null");
         }
     }
+
+    SECTION("issue #367 - Behavior of operator>> should more closely resemble that of built-in overloads.")
+    {
+	SECTION("example 1")
+	{
+	    std::istringstream i1_2_3( "{\"first\": \"one\" }{\"second\": \"two\"}3" );
+	    json j1, j2, j3;
+	    i1_2_3 >> j1;
+	    i1_2_3 >> j2;
+	    i1_2_3 >> j3;
+
+	    std::map<std::string,std::string> m1 = j1;
+	    std::map<std::string,std::string> m2 = j2;
+	    int i3 = j3;
+
+	    CHECK( m1 == ( std::map<std::string,std::string> {{ "first",  "one" }} ));
+	    CHECK( m2 == ( std::map<std::string,std::string> {{ "second", "two" }} ));
+	    CHECK( i3 == 3 );
+	}
+    }
 }

From 12efeadc2eead552c5155352b84cd4dd899e169c Mon Sep 17 00:00:00 2001
From: Perry Kundert <perry@hardconsulting.com>
Date: Tue, 3 Oct 2017 08:49:39 -0700
Subject: [PATCH 03/17] Further simplify istream handling; use native unget

---
 src/json.hpp | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/src/json.hpp b/src/json.hpp
index 942acb87..160fa4eb 100644
--- a/src/json.hpp
+++ b/src/json.hpp
@@ -1398,6 +1398,7 @@ constexpr T static_const<T>::value;
 struct input_adapter_protocol
 {
     virtual int get_character() = 0;
+    virtual void unget_character() = 0;
     virtual ~input_adapter_protocol() = default;
 };
 
@@ -1448,6 +1449,10 @@ class input_stream_adapter : public input_adapter_protocol
 	return c  == std::char_traits<char>::eof() ? c : ( c & 0xFF );
     }
 
+    void unget_character() override
+    {
+	is.unget();
+    }
   private:
 
     /// the associated input stream
@@ -1482,6 +1487,14 @@ class input_buffer_adapter : public input_adapter_protocol
         return std::char_traits<char>::eof();
     }
 
+    void unget_character() noexcept override
+    {
+	if (JSON_LIKELY(cursor > 0))
+	{
+	    --cursor;
+	}
+    }
+
   private:
     /// pointer to the current character
     const char* cursor;
@@ -2647,8 +2660,8 @@ scan_number_done:
     int get()
     {
         ++chars_read;
-	int c = next_unget ? (next_unget = false, current)
-	    : (current = ia->get_character());
+
+	int c = current = ia->get_character();
 	token_string += static_cast<char>( c );
 	return c;
     }
@@ -2657,7 +2670,7 @@ scan_number_done:
     void unget()
     {
         --chars_read;
-        next_unget = true;
+
 	if (token_string.size() > 0)
 	    token_string.resize( token_string.size() - 1 );
     }
@@ -2825,9 +2838,6 @@ scan_number_done:
     /// the current character
     int current = std::char_traits<char>::eof();
 
-    /// whether get() should return the last character again
-    bool next_unget = false;
-
     /// the number of characters read
     std::size_t chars_read = 0;
     /// the start position of the current token

From 14ca1f6f09e8da48e1191a527bba20ec86709c83 Mon Sep 17 00:00:00 2001
From: Perry Kundert <perry@hardconsulting.com>
Date: Tue, 3 Oct 2017 14:38:38 -0700
Subject: [PATCH 04/17] Restore istream performance #764 o Use std::streambuf
 I/O instead of std::istream; does not maintain   (unused) istream flags. o
 Further simplify get/unget handling. o Restore original handling of NUL in
 input stream; ignored during   token_string escaping.

---
 src/json.hpp | 111 +++++++++++++++++++++++++++------------------------
 1 file changed, 58 insertions(+), 53 deletions(-)

diff --git a/src/json.hpp b/src/json.hpp
index 160fa4eb..516f2bbb 100644
--- a/src/json.hpp
+++ b/src/json.hpp
@@ -1410,48 +1410,51 @@ using input_adapter_t = std::shared_ptr<input_adapter_protocol>;
 class input_stream_adapter : public input_adapter_protocol
 {
   public:
+    ~input_stream_adapter() override
+    {
+        // clear stream flags; we use underlying streambuf I/O, do not maintain ifstream flags
+        is.clear();
+    }
     explicit input_stream_adapter(std::istream& i)
         : is(i)
     {
-	// Ignore Byte Order Mark at start of input
-	int c;
-	if (( c = get_character() ) == 0xEF )
-	{
-	    if (( c = get_character() ) == 0xBB )
-	    {
-		if (( c = get_character() ) == 0xBF )
-		{
-		    return; // Ignore BOM
-		}
-		else if ( c != std::char_traits<char>::eof() )
-		{
-		    is.unget();
-		}
-		is.putback( '\xBB' );
-	    }
-	    else if ( c != std::char_traits<char>::eof() )
-	    {
-		is.unget();
-	    }
-	    is.putback( '\xEF' );
-	}
-	else if ( c != std::char_traits<char>::eof() )
-	{
-	    is.unget(); // Not BOM.  Process as usual.
-	}
+        // Ignore Byte Order Mark at start of input
+        int c;
+        if (( c = get_character() ) == 0xEF )
+        {
+            if (( c = get_character() ) == 0xBB )
+            {
+                if (( c = get_character() ) == 0xBF )
+                {
+                    return; // Ignore BOM
+                }
+                else if ( c != std::char_traits<char>::eof() )
+                {
+                    is.unget();
+                }
+                is.putback( '\xBB' );
+            }
+            else if ( c != std::char_traits<char>::eof() )
+            {
+                is.unget();
+            }
+            is.putback( '\xEF' );
+        }
+        else if ( c != std::char_traits<char>::eof() )
+        {
+            is.unget(); // Not BOM.  Process as usual.
+        }
     }
 
-    ~input_stream_adapter() override {}
-
     int get_character() override
     {
-	int c = is.get();
-	return c  == std::char_traits<char>::eof() ? c : ( c & 0xFF );
+        int c = is.rdbuf()->sbumpc(); // Avoided for performance: int c = is.get();
+        return c  == std::char_traits<char>::eof() ? c : ( c & 0xFF );
     }
 
     void unget_character() override
     {
-	is.unget();
+        is.rdbuf()->sungetc(); // Avoided for performance: is.unget();
     }
   private:
 
@@ -1489,10 +1492,10 @@ class input_buffer_adapter : public input_adapter_protocol
 
     void unget_character() noexcept override
     {
-	if (JSON_LIKELY(cursor > 0))
-	{
-	    --cursor;
-	}
+        if (JSON_LIKELY(cursor > start))
+        {
+            --cursor;
+        }
     }
 
   private:
@@ -2571,7 +2574,7 @@ scan_number_any2:
 scan_number_done:
         // unget the character after the number (we only read it to know that
         // we are done scanning a number)
-	unget();
+        unget();
 
         // terminate token
         add('\0');
@@ -2652,29 +2655,31 @@ scan_number_done:
     void reset() noexcept
     {
         yylen = 0;
-        start_pos = chars_read - 1;
-	token_string = static_cast<char>( current );
+        token_string.clear();
+        token_string.push_back(static_cast<char>(current));
     }
 
     /// get a character from the input
     int get()
     {
         ++chars_read;
-
-	int c = current = ia->get_character();
-	token_string += static_cast<char>( c );
-	return c;
+        int c = current = ia->get_character();
+        token_string.push_back(static_cast<char>(c));
+        return c;
     }
 
     /// unget current character (return it again on next get)
     void unget()
     {
         --chars_read;
-
-	if (token_string.size() > 0)
-	    token_string.resize( token_string.size() - 1 );
+        if (JSON_LIKELY(current != std::char_traits<char>::eof()))
+        {
+            ia->unget_character();
+        }
+        if (! token_string.empty())
+            token_string.pop_back();
     }
-	
+        
     /// add a character to yytext
     void add(int c)
     {
@@ -2736,10 +2741,12 @@ scan_number_done:
         std::string result;
         for (auto c : token_string)
         {
-	    if ( c == std::char_traits<char>::eof() ) {
-		continue;
-	    }
-	    else if ('\x00' <= c and c <= '\x1f')
+            if (c == '\0' or c == std::char_traits<char>::eof())
+            {
+                // ignore EOF
+                continue;
+            }
+            else if ('\x00' <= c and c <= '\x1f')
             {
                 // escape control characters
                 std::stringstream ss;
@@ -2840,10 +2847,8 @@ scan_number_done:
 
     /// the number of characters read
     std::size_t chars_read = 0;
-    /// the start position of the current token
-    std::size_t start_pos = 0;
     /// raw input token string (for error messages)
-    std::string token_string = "";
+    std::vector<char> token_string = std::vector<char>();
 
     /// buffer for variable-length tokens (numbers, strings)
     std::vector<char> yytext = std::vector<char>(1024, '\0');

From 7c523338c512eee9d0c039bb90edbe64bd3d139e Mon Sep 17 00:00:00 2001
From: Perry Kundert <perry@hardconsulting.com>
Date: Wed, 4 Oct 2017 08:33:35 -0700
Subject: [PATCH 05/17] Remove unnnecessary NUL termination of yytext (as it
 may contain NULs)

---
 src/json.hpp | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/src/json.hpp b/src/json.hpp
index 516f2bbb..1231354c 100644
--- a/src/json.hpp
+++ b/src/json.hpp
@@ -1828,9 +1828,6 @@ class lexer
                 // closing quote
                 case '\"':
                 {
-                    // terminate yytext
-                    add('\0');
-                    --yylen;
                     return token_type::value_string;
                 }
 
@@ -2576,10 +2573,6 @@ scan_number_done:
         // we are done scanning a number)
         unget();
 
-        // terminate token
-        add('\0');
-        --yylen;
-
         char* endptr = nullptr;
         errno = 0;
 

From 97a388802d9e4175f53a4322e0fa3180b43a1acf Mon Sep 17 00:00:00 2001
From: Perry Kundert <perry@hardconsulting.com>
Date: Wed, 4 Oct 2017 08:40:32 -0700
Subject: [PATCH 06/17] Improve performance by constructing yytext as a
 std::string o Return its contents when necessary.  In many cases, this avoids
   construction of multiple copies of the yytext token.  Exceeds   performance
 of current develop branch.

---
 src/json.hpp | 31 ++++++++++---------------------
 1 file changed, 10 insertions(+), 21 deletions(-)

diff --git a/src/json.hpp b/src/json.hpp
index 1231354c..48c5aa86 100644
--- a/src/json.hpp
+++ b/src/json.hpp
@@ -1795,9 +1795,9 @@ class lexer
     @brief scan a string literal
 
     This function scans a string according to Sect. 7 of RFC 7159. While
-    scanning, bytes are escaped and copied into buffer yytext. Then the
-    function returns successfully, yytext is null-terminated and yylen
-    contains the number of bytes in the string.
+    scanning, bytes are escaped and copied into buffer yytext. Then the function
+    returns successfully, yytext is *not* null-terminated (as it may contain \0
+    bytes), and yytext.size() is the number of bytes in the string.
 
     @return token_type::value_string if string could be successfully scanned,
             token_type::parse_error otherwise
@@ -2582,7 +2582,7 @@ scan_number_done:
             const auto x = std::strtoull(yytext.data(), &endptr, 10);
 
             // we checked the number format before
-            assert(endptr == yytext.data() + yylen);
+            assert(endptr == yytext.data() + yytext.size());
 
             if (errno == 0)
             {
@@ -2598,7 +2598,7 @@ scan_number_done:
             const auto x = std::strtoll(yytext.data(), &endptr, 10);
 
             // we checked the number format before
-            assert(endptr == yytext.data() + yylen);
+            assert(endptr == yytext.data() + yytext.size());
 
             if (errno == 0)
             {
@@ -2615,7 +2615,7 @@ scan_number_done:
         strtof(value_float, yytext.data(), &endptr);
 
         // we checked the number format before
-        assert(endptr == yytext.data() + yylen);
+        assert(endptr == yytext.data() + yytext.size());
 
         return token_type::value_float;
     }
@@ -2647,7 +2647,7 @@ scan_number_done:
     /// reset yytext; current character is beginning of token
     void reset() noexcept
     {
-        yylen = 0;
+        yytext.clear();
         token_string.clear();
         token_string.push_back(static_cast<char>(current));
     }
@@ -2676,14 +2676,7 @@ scan_number_done:
     /// add a character to yytext
     void add(int c)
     {
-        // resize yytext if necessary; this condition is deemed unlikely,
-        // because we start with a 1024-byte buffer
-        if (JSON_UNLIKELY((yylen + 1 > yytext.capacity())))
-        {
-            yytext.resize(2 * yytext.capacity(), '\0');
-        }
-        assert(yylen < yytext.size());
-        yytext[yylen++] = static_cast<char>(c);
+        yytext.push_back(static_cast<char>(c));
     }
 
   public:
@@ -2712,9 +2705,7 @@ scan_number_done:
     /// return string value
     const std::string get_string()
     {
-        // yytext cannot be returned as char*, because it may contain a null
-        // byte (parsed as "\u0000")
-        return std::string(yytext.data(), yylen);
+        return std::move( yytext );
     }
 
     /////////////////////
@@ -2844,9 +2835,7 @@ scan_number_done:
     std::vector<char> token_string = std::vector<char>();
 
     /// buffer for variable-length tokens (numbers, strings)
-    std::vector<char> yytext = std::vector<char>(1024, '\0');
-    /// current index in yytext
-    std::size_t yylen = 0;
+    std::string yytext = "";
 
     /// a description of occurred lexer errors
     const char* error_message = "";

From e0d890cc23045dfb7e2e1af160cc35437dfeff82 Mon Sep 17 00:00:00 2001
From: Perry Kundert <perry@hardconsulting.com>
Date: Wed, 4 Oct 2017 10:21:55 -0700
Subject: [PATCH 07/17] Corrected unnnecessary const restriction on returned
 std::string

---
 src/json.hpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/json.hpp b/src/json.hpp
index 48c5aa86..4f996457 100644
--- a/src/json.hpp
+++ b/src/json.hpp
@@ -2656,9 +2656,9 @@ scan_number_done:
     int get()
     {
         ++chars_read;
-        int c = current = ia->get_character();
-        token_string.push_back(static_cast<char>(c));
-        return c;
+        current = ia->get_character();
+        token_string.push_back(static_cast<char>(current));
+        return current;
     }
 
     /// unget current character (return it again on next get)
@@ -2703,7 +2703,7 @@ scan_number_done:
     }
 
     /// return string value
-    const std::string get_string()
+    std::string get_string()
     {
         return std::move( yytext );
     }

From 8665e25942a74a3503a76bd59952dd42fbe7ba61 Mon Sep 17 00:00:00 2001
From: Perry Kundert <perry@hardconsulting.com>
Date: Wed, 4 Oct 2017 10:47:52 -0700
Subject: [PATCH 08/17] Rename get_string to move_string to imply side-effect

---
 src/json.hpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/json.hpp b/src/json.hpp
index 4f996457..2d95f18d 100644
--- a/src/json.hpp
+++ b/src/json.hpp
@@ -2702,8 +2702,8 @@ scan_number_done:
         return value_float;
     }
 
-    /// return string value
-    std::string get_string()
+    /// return current string value (implicitly resets the token; useful only once)
+    std::string move_string()
     {
         return std::move( yytext );
     }
@@ -3004,7 +3004,7 @@ class parser
                     {
                         return;
                     }
-                    key = m_lexer.get_string();
+                    key = m_lexer.move_string();
 
                     bool keep_tag = false;
                     if (keep)
@@ -3142,7 +3142,7 @@ class parser
             case token_type::value_string:
             {
                 result.m_type = value_t::string;
-                result.m_value = m_lexer.get_string();
+                result.m_value = m_lexer.move_string();
                 break;
             }
 

From 546e148b24c8b58a6e3bed8aadb8f081f7059f98 Mon Sep 17 00:00:00 2001
From: Perry Kundert <perry@hardconsulting.com>
Date: Wed, 4 Oct 2017 11:31:10 -0700
Subject: [PATCH 09/17] Further performance improvements, and corrections in
 get_token_string o An (-'ve valued, typically -1) EOF must never be allowed
 in   token_string, as it be converted to 255 -- a legitimate value. o
 Comparing against a specific eof() (-1, typically) is more costly than  
 detecting +'ve/-'ve.  Since EOF is the only non-positive value allowed   we
 can use the simpler test. o Removed unnecessary test for token_string size,
 as it is already   tested in the method, and must never occur in correct
 code; used an   assert instead.

---
 src/json.hpp                   | 45 ++++++++++++++++++++--------------
 test/src/unit-class_parser.cpp |  2 +-
 2 files changed, 28 insertions(+), 19 deletions(-)

diff --git a/src/json.hpp b/src/json.hpp
index 2d95f18d..ed5f1c1d 100644
--- a/src/json.hpp
+++ b/src/json.hpp
@@ -1397,8 +1397,8 @@ constexpr T static_const<T>::value;
 /// abstract input adapter interface
 struct input_adapter_protocol
 {
-    virtual int get_character() = 0;
-    virtual void unget_character() = 0;
+    virtual int get_character() = 0; // returns characters in range [0,255], or eof() (a -'ve value)
+    virtual void unget_character() = 0; // restore the last non-eof() character to input
     virtual ~input_adapter_protocol() = default;
 };
 
@@ -1449,7 +1449,7 @@ class input_stream_adapter : public input_adapter_protocol
     int get_character() override
     {
         int c = is.rdbuf()->sbumpc(); // Avoided for performance: int c = is.get();
-        return c  == std::char_traits<char>::eof() ? c : ( c & 0xFF );
+        return c < 0 ? c : ( c & 0xFF ); // faster than == std::char_traits<char>::eof()
     }
 
     void unget_character() override
@@ -2652,12 +2652,24 @@ scan_number_done:
         token_string.push_back(static_cast<char>(current));
     }
 
-    /// get a character from the input
+    /*
+    @brief get next character from the input
+
+    This function provides the interface to the used input adapter. It does
+    not throw in case the input reached EOF, but returns a -'ve valued
+    `std::char_traits<char>::eof()` in that case.  Stores the scanned characters
+    for use in error messages.
+
+    @return character read from the input
+    */
     int get()
     {
         ++chars_read;
         current = ia->get_character();
-        token_string.push_back(static_cast<char>(current));
+        if (JSON_LIKELY(current >= 0)) // faster than: != std::char_traits<char>::eof()))
+        {
+            token_string.push_back(static_cast<char>(current));
+        }
         return current;
     }
 
@@ -2665,12 +2677,12 @@ scan_number_done:
     void unget()
     {
         --chars_read;
-        if (JSON_LIKELY(current != std::char_traits<char>::eof()))
+        if (JSON_LIKELY(current >= 0)) // faster than: != std::char_traits<char>::eof()))
         {
             ia->unget_character();
-        }
-        if (! token_string.empty())
+            assert(token_string.size() != 0);
             token_string.pop_back();
+        }
     }
         
     /// add a character to yytext
@@ -2718,19 +2730,16 @@ scan_number_done:
         return chars_read;
     }
 
-    /// return the last read token (for errors only)
+    /// return the last read token (for errors only).  Will never contain EOF
+    /// (a -'ve value), because 255 may legitimately occur.  May contain NUL, which
+    /// should be escaped.
     std::string get_token_string() const
     {
         // escape control characters
         std::string result;
         for (auto c : token_string)
         {
-            if (c == '\0' or c == std::char_traits<char>::eof())
-            {
-                // ignore EOF
-                continue;
-            }
-            else if ('\x00' <= c and c <= '\x1f')
+            if ('\x00' <= c and c <= '\x1f')
             {
                 // escape control characters
                 std::stringstream ss;
@@ -5144,7 +5153,7 @@ class binary_reader
     @brief get next character from the input
 
     This function provides the interface to the used input adapter. It does
-    not throw in case the input reached EOF, but returns
+    not throw in case the input reached EOF, but returns a -'ve valued
     `std::char_traits<char>::eof()` in that case.
 
     @return character read from the input
@@ -5448,14 +5457,14 @@ class binary_reader
     {
         if (expect_eof)
         {
-            if (JSON_UNLIKELY(current != std::char_traits<char>::eof()))
+            if (JSON_UNLIKELY(current >= 0 )) // faster than: != std::char_traits<char>::eof()))
             {
                 JSON_THROW(parse_error::create(110, chars_read, "expected end of input"));
             }
         }
         else
         {
-            if (JSON_UNLIKELY(current == std::char_traits<char>::eof()))
+            if (JSON_UNLIKELY(current < 0)) // faster than: == std::char_traits<char>::eof()))
             {
                 JSON_THROW(parse_error::create(110, chars_read, "unexpected end of input"));
             }
diff --git a/test/src/unit-class_parser.cpp b/test/src/unit-class_parser.cpp
index 08d4d6ef..9afa7d26 100644
--- a/test/src/unit-class_parser.cpp
+++ b/test/src/unit-class_parser.cpp
@@ -215,7 +215,7 @@ TEST_CASE("parser class")
                     std::string s = "\"1\"";
                     s[1] = '\0';
                     CHECK_THROWS_AS(json::parse(s.begin(), s.end()), json::parse_error&);
-                    CHECK_THROWS_WITH(json::parse(s.begin(), s.end()), "[json.exception.parse_error.101] parse error at 2: syntax error - invalid string: control character must be escaped; last read: '\"'");
+                    CHECK_THROWS_WITH(json::parse(s.begin(), s.end()), "[json.exception.parse_error.101] parse error at 2: syntax error - invalid string: control character must be escaped; last read: '\"<U+0000>'");
                 }
             }
 

From f775922ca8d94c2cdf602ab9ffba546f60b79a49 Mon Sep 17 00:00:00 2001
From: Perry Kundert <perry@hardconsulting.com>
Date: Wed, 4 Oct 2017 15:01:10 -0700
Subject: [PATCH 10/17] Specify initializers for yytest, token_string using
 initializer-lists o We can retain -Weffc++ and specify default initializers
 by using   initializer lists.  The risks are low (of additional non-conformat
   compilers), because there is already one other such initialization   used
 in the code-base.

---
 src/json.hpp | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/src/json.hpp b/src/json.hpp
index ed5f1c1d..bf07d1c8 100644
--- a/src/json.hpp
+++ b/src/json.hpp
@@ -2833,29 +2833,30 @@ scan_number_done:
 
   private:
     /// input adapter
-    detail::input_adapter_t ia = nullptr;
+    detail::input_adapter_t ia { nullptr };
 
     /// the current character
-    int current = std::char_traits<char>::eof();
+    int current { std::char_traits<char>::eof() };
 
     /// the number of characters read
-    std::size_t chars_read = 0;
+    std::size_t chars_read { 0 };
+
     /// raw input token string (for error messages)
-    std::vector<char> token_string = std::vector<char>();
+    std::vector<char> token_string { };
 
     /// buffer for variable-length tokens (numbers, strings)
-    std::string yytext = "";
+    std::string yytext { };
 
     /// a description of occurred lexer errors
-    const char* error_message = "";
+    const char* error_message { "" };
 
     // number values
-    number_integer_t value_integer = 0;
-    number_unsigned_t value_unsigned = 0;
-    number_float_t value_float = 0;
+    number_integer_t value_integer { 0 };
+    number_unsigned_t value_unsigned { 0 };
+    number_float_t value_float { 0 };
 
     /// the decimal point
-    const char decimal_point_char = '.';
+    const char decimal_point_char { '.' };
 };
 
 /*!

From 184dab60e6440092674d92e855cdcd2d51e175e0 Mon Sep 17 00:00:00 2001
From: Perry Kundert <perry@hardconsulting.com>
Date: Thu, 5 Oct 2017 14:13:55 -0700
Subject: [PATCH 11/17] Accelerate access to underlying std::istream streambuf

---
 src/json.hpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/json.hpp b/src/json.hpp
index bf07d1c8..8f28051a 100644
--- a/src/json.hpp
+++ b/src/json.hpp
@@ -1417,6 +1417,7 @@ class input_stream_adapter : public input_adapter_protocol
     }
     explicit input_stream_adapter(std::istream& i)
         : is(i)
+        , sb(i.rdbuf())
     {
         // Ignore Byte Order Mark at start of input
         int c;
@@ -1448,18 +1449,19 @@ class input_stream_adapter : public input_adapter_protocol
 
     int get_character() override
     {
-        int c = is.rdbuf()->sbumpc(); // Avoided for performance: int c = is.get();
+        int c = sb->sbumpc(); // Avoided for performance: int c = is.get();
         return c < 0 ? c : ( c & 0xFF ); // faster than == std::char_traits<char>::eof()
     }
 
     void unget_character() override
     {
-        is.rdbuf()->sungetc(); // Avoided for performance: is.unget();
+        sb->sungetc(); // Avoided for performance: is.unget();
     }
   private:
 
     /// the associated input stream
     std::istream& is;
+    std::streambuf *sb;
 };
 
 /// input adapter for buffer input

From 1b43a45bec8057fba315c3eccd91d567cd32a273 Mon Sep 17 00:00:00 2001
From: Perry Kundert <perry@hardconsulting.com>
Date: Thu, 5 Oct 2017 15:37:03 -0700
Subject: [PATCH 12/17] Implement correct handling of std::streambuf int_type,
 eof() o Make no assumptions about eof(), other than that it is somewhere  
 outside of the valid range of char_type.

---
 src/json.hpp | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/src/json.hpp b/src/json.hpp
index 8f28051a..5fc467da 100644
--- a/src/json.hpp
+++ b/src/json.hpp
@@ -1397,7 +1397,7 @@ constexpr T static_const<T>::value;
 /// abstract input adapter interface
 struct input_adapter_protocol
 {
-    virtual int get_character() = 0; // returns characters in range [0,255], or eof() (a -'ve value)
+    virtual int get_character() = 0; // returns characters in range [0,255], or eof()
     virtual void unget_character() = 0; // restore the last non-eof() character to input
     virtual ~input_adapter_protocol() = default;
 };
@@ -1447,10 +1447,13 @@ class input_stream_adapter : public input_adapter_protocol
         }
     }
 
+    // delete because of pointer members
+    input_stream_adapter(const input_stream_adapter&) = delete;
+    input_stream_adapter& operator=(input_stream_adapter&) = delete;
+
     int get_character() override
     {
-        int c = sb->sbumpc(); // Avoided for performance: int c = is.get();
-        return c < 0 ? c : ( c & 0xFF ); // faster than == std::char_traits<char>::eof()
+        return reinterpret_cast<int>( sb->sbumpc() );
     }
 
     void unget_character() override
@@ -1486,10 +1489,10 @@ class input_buffer_adapter : public input_adapter_protocol
     {
         if (JSON_LIKELY(cursor < limit))
         {
-            return *(cursor++) & 0xFF;
+            return reinterpret_cast<int>(std::char_traits<char>::to_int_type(*(cursor++)));
         }
 
-        return std::char_traits<char>::eof();
+        return reinterpret_cast<int>(std::char_traits<char>::eof());
     }
 
     void unget_character() noexcept override
@@ -2668,9 +2671,9 @@ scan_number_done:
     {
         ++chars_read;
         current = ia->get_character();
-        if (JSON_LIKELY(current >= 0)) // faster than: != std::char_traits<char>::eof()))
+        if (JSON_LIKELY( current != std::char_traits<char>::eof()))
         {
-            token_string.push_back(static_cast<char>(current));
+            token_string.push_back(std::char_traits<char>::to_char_type(current));
         }
         return current;
     }
@@ -2679,7 +2682,7 @@ scan_number_done:
     void unget()
     {
         --chars_read;
-        if (JSON_LIKELY(current >= 0)) // faster than: != std::char_traits<char>::eof()))
+        if (JSON_LIKELY(current != std::char_traits<char>::eof()))
         {
             ia->unget_character();
             assert(token_string.size() != 0);
@@ -2690,7 +2693,7 @@ scan_number_done:
     /// add a character to yytext
     void add(int c)
     {
-        yytext.push_back(static_cast<char>(c));
+        yytext.push_back(std::char_traits<char>::to_char_type(c));
     }
 
   public:
@@ -5460,14 +5463,14 @@ class binary_reader
     {
         if (expect_eof)
         {
-            if (JSON_UNLIKELY(current >= 0 )) // faster than: != std::char_traits<char>::eof()))
+            if (JSON_UNLIKELY(current != std::char_traits<char>::eof()))
             {
                 JSON_THROW(parse_error::create(110, chars_read, "expected end of input"));
             }
         }
         else
         {
-            if (JSON_UNLIKELY(current < 0)) // faster than: == std::char_traits<char>::eof()))
+            if (JSON_UNLIKELY(current == std::char_traits<char>::eof()))
             {
                 JSON_THROW(parse_error::create(110, chars_read, "unexpected end of input"));
             }

From 5e480b56d843f3dd6869c2f7bbaeec1788076ac5 Mon Sep 17 00:00:00 2001
From: Perry Kundert <perry@hardconsulting.com>
Date: Fri, 6 Oct 2017 07:37:49 -0700
Subject: [PATCH 13/17] Further simplify character type handling

---
 src/json.hpp | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/json.hpp b/src/json.hpp
index 5fc467da..e3231f7d 100644
--- a/src/json.hpp
+++ b/src/json.hpp
@@ -1451,9 +1451,12 @@ class input_stream_adapter : public input_adapter_protocol
     input_stream_adapter(const input_stream_adapter&) = delete;
     input_stream_adapter& operator=(input_stream_adapter&) = delete;
 
+    // std::istream/std::streambuf use std::char_traits<char>::to_int_type, to
+    // ensure that std::char_traits<char>::eof() and the character 0xff do not
+    // end up as the same value, eg. 0xffffffff.
     int get_character() override
     {
-        return reinterpret_cast<int>( sb->sbumpc() );
+        return sb->sbumpc();
     }
 
     void unget_character() override
@@ -1489,10 +1492,10 @@ class input_buffer_adapter : public input_adapter_protocol
     {
         if (JSON_LIKELY(cursor < limit))
         {
-            return reinterpret_cast<int>(std::char_traits<char>::to_int_type(*(cursor++)));
+            return std::char_traits<char>::to_int_type(*(cursor++));
         }
 
-        return reinterpret_cast<int>(std::char_traits<char>::eof());
+        return std::char_traits<char>::eof();
     }
 
     void unget_character() noexcept override

From 45e1e3d48ad29f1301c9ecc6966c1e240e6b0168 Mon Sep 17 00:00:00 2001
From: Perry Kundert <perry@hardconsulting.com>
Date: Fri, 6 Oct 2017 07:53:31 -0700
Subject: [PATCH 14/17] Revert some unnecessary member initializer changes.

---
 src/json.hpp | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/json.hpp b/src/json.hpp
index e3231f7d..50d23d95 100644
--- a/src/json.hpp
+++ b/src/json.hpp
@@ -2841,13 +2841,13 @@ scan_number_done:
 
   private:
     /// input adapter
-    detail::input_adapter_t ia { nullptr };
+    detail::input_adapter_t ia = nullptr;
 
     /// the current character
-    int current { std::char_traits<char>::eof() };
+    int current = std::char_traits<char>::eof();
 
     /// the number of characters read
-    std::size_t chars_read { 0 };
+    std::size_t chars_read = 0;
 
     /// raw input token string (for error messages)
     std::vector<char> token_string { };
@@ -2856,15 +2856,15 @@ scan_number_done:
     std::string yytext { };
 
     /// a description of occurred lexer errors
-    const char* error_message { "" };
+    const char* error_message = "";
 
     // number values
-    number_integer_t value_integer { 0 };
-    number_unsigned_t value_unsigned { 0 };
-    number_float_t value_float { 0 };
+    number_integer_t value_integer = 0;
+    number_unsigned_t value_unsigned = 0;
+    number_float_t value_float = 0;
 
     /// the decimal point
-    const char decimal_point_char { '.' };
+    const char decimal_point_char = '.';
 };
 
 /*!

From 23440eb86ea2b5bcc4321cf39c4072296f04590b Mon Sep 17 00:00:00 2001
From: Perry Kundert <perry@hardconsulting.com>
Date: Fri, 6 Oct 2017 12:27:53 -0700
Subject: [PATCH 15/17] Remove outdated commentary about the value of eof(),
 retain input type o We assume the same character int_type as the unerlying
 std::istream o There are no assumptions on the value of eof(), other than
 that it   will not be a valid unsigned char value. o To retain performance,
 we do not allow swapping out the underlying   std::streambuf during our use
 of the std::istream for parsing.

---
 src/json.hpp | 49 +++++++++++++++++++++++++++++++++----------------
 1 file changed, 33 insertions(+), 16 deletions(-)

diff --git a/src/json.hpp b/src/json.hpp
index 50d23d95..750ae51a 100644
--- a/src/json.hpp
+++ b/src/json.hpp
@@ -1394,10 +1394,22 @@ constexpr T static_const<T>::value;
 // input adapters //
 ////////////////////
 
-/// abstract input adapter interface
+/*!
+@brief abstract input adapter interface
+
+Produces a stream of std::char_traits<char>::int_type characters from a
+std::istream, a buffer, or some other input type.  Accepts the return of exactly
+one non-EOF character for future input.  The int_type characters returned
+consist of all valid char values as positive values (typically unsigned char),
+plus an EOF value outside that range, specified by the value of the function
+std::char_traits<char>::eof().  This value is typically -1, but could be any
+arbitrary value which is not a valid char value.
+
+@return Typically [0,255] plus std::char_traits<char>::eof().
+*/
 struct input_adapter_protocol
 {
-    virtual int get_character() = 0; // returns characters in range [0,255], or eof()
+    virtual std::char_traits<char>::int_type get_character() = 0;
     virtual void unget_character() = 0; // restore the last non-eof() character to input
     virtual ~input_adapter_protocol() = default;
 };
@@ -1405,8 +1417,13 @@ struct input_adapter_protocol
 /// a type to simplify interfaces
 using input_adapter_t = std::shared_ptr<input_adapter_protocol>;
 
-
-/// input adapter for a (caching) istream.  Ignores a UFT Byte Order Mark at beginning of input.
+/// input adapter for a (caching) istream.  Ignores a UFT Byte Order Mark at
+/// beginning of input.  Does not support changing the underlying std::streambuf
+/// in mid-input.  Maintains underlying std::istream and std::streambuf to
+/// support subsequent use of standard std::istream operations to process any
+/// input characters following those used in parsing the JSON input.  Clears the
+/// std::istream flags; any input errors (eg. EOF) will be detected by the first
+/// subsequent call for input from the std::istream.
 class input_stream_adapter : public input_adapter_protocol
 {
   public:
@@ -1417,10 +1434,10 @@ class input_stream_adapter : public input_adapter_protocol
     }
     explicit input_stream_adapter(std::istream& i)
         : is(i)
-        , sb(i.rdbuf())
+        , sb(*i.rdbuf())
     {
         // Ignore Byte Order Mark at start of input
-        int c;
+        std::char_traits<char>::int_type c;
         if (( c = get_character() ) == 0xEF )
         {
             if (( c = get_character() ) == 0xBB )
@@ -1454,20 +1471,20 @@ class input_stream_adapter : public input_adapter_protocol
     // std::istream/std::streambuf use std::char_traits<char>::to_int_type, to
     // ensure that std::char_traits<char>::eof() and the character 0xff do not
     // end up as the same value, eg. 0xffffffff.
-    int get_character() override
+    std::char_traits<char>::int_type get_character() override
     {
-        return sb->sbumpc();
+        return sb.sbumpc();
     }
 
     void unget_character() override
     {
-        sb->sungetc(); // Avoided for performance: is.unget();
+        sb.sungetc(); // Avoided for performance: is.unget();
     }
   private:
 
     /// the associated input stream
     std::istream& is;
-    std::streambuf *sb;
+    std::streambuf &sb;
 };
 
 /// input adapter for buffer input
@@ -1488,7 +1505,7 @@ class input_buffer_adapter : public input_adapter_protocol
     input_buffer_adapter(const input_buffer_adapter&) = delete;
     input_buffer_adapter& operator=(input_buffer_adapter&) = delete;
 
-    int get_character() noexcept override
+    std::char_traits<char>::int_type get_character() noexcept override
     {
         if (JSON_LIKELY(cursor < limit))
         {
@@ -2664,13 +2681,13 @@ scan_number_done:
     @brief get next character from the input
 
     This function provides the interface to the used input adapter. It does
-    not throw in case the input reached EOF, but returns a -'ve valued
+    not throw in case the input reached EOF, but returns a
     `std::char_traits<char>::eof()` in that case.  Stores the scanned characters
     for use in error messages.
 
     @return character read from the input
     */
-    int get()
+    std::char_traits<char>::int_type get()
     {
         ++chars_read;
         current = ia->get_character();
@@ -2739,8 +2756,8 @@ scan_number_done:
     }
 
     /// return the last read token (for errors only).  Will never contain EOF
-    /// (a -'ve value), because 255 may legitimately occur.  May contain NUL, which
-    /// should be escaped.
+    /// (an arbitrary value that is not a valid char value, often -1), because
+    /// 255 may legitimately occur.  May contain NUL, which should be escaped.
     std::string get_token_string() const
     {
         // escape control characters
@@ -2844,7 +2861,7 @@ scan_number_done:
     detail::input_adapter_t ia = nullptr;
 
     /// the current character
-    int current = std::char_traits<char>::eof();
+    std::char_traits<char>::int_type current = std::char_traits<char>::eof();
 
     /// the number of characters read
     std::size_t chars_read = 0;

From 0b803d0a5ff48687558c85e7ceb5c2cf70ea29af Mon Sep 17 00:00:00 2001
From: Perry Kundert <perry@hardconsulting.com>
Date: Sat, 7 Oct 2017 15:50:19 -0700
Subject: [PATCH 16/17] Simplify the json/src/benchmarks.cpp to allow more
 optimal code gen. o For some unknown reason, the complexity of the benchmark
 platform   prevented some C++ compilers from generating optimal code,
 properly   reflective of the real performance in actual deployment. o Added
 the json_benchmarks_simple target, which performs the same   suite of tests
 as json_benchmarks. o Simplified the benchmark platform, and emit an
 "Average" TPS   (Transactions Per Second) value reflective of aggregate
 parse/output   performance.

---
 .gitignore                           |   1 +
 benchmarks/Makefile                  |  18 ++-
 benchmarks/src/benchmarks.cpp        |  20 +++-
 benchmarks/src/benchmarks_simple.cpp | 158 +++++++++++++++++++++++++++
 4 files changed, 190 insertions(+), 7 deletions(-)
 create mode 100644 benchmarks/src/benchmarks_simple.cpp

diff --git a/.gitignore b/.gitignore
index 5b2bc0fa..8157f1a9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,6 @@
 json_unit
 json_benchmarks
+json_benchmarks_simple
 fuzz-testing
 
 *.dSYM
diff --git a/benchmarks/Makefile b/benchmarks/Makefile
index 0e4068c4..ef2de8a3 100644
--- a/benchmarks/Makefile
+++ b/benchmarks/Makefile
@@ -1,11 +1,21 @@
-all: json_benchmarks
-	./json_benchmarks
 
-json_benchmarks: src/benchmarks.cpp ../src/json.hpp number_jsons
+#
+# Build/run json.hpp benchmarks, eg. CXX=g++-7 make
+# 
+# The existing json_benchmarks did not allow optimization under some compilers
+# 
+all: json_benchmarks json_benchmarks_simple number_jsons
+	bash -c 'time ./json_benchmarks'
+	bash -c 'time ./json_benchmarks_simple'
+
+json_benchmarks: src/benchmarks.cpp ../src/json.hpp
 	$(CXX) -std=c++11 -pthread $(CXXFLAGS) -DNDEBUG -O3 -flto -I thirdparty/benchpress -I thirdparty/cxxopts -I../src src/benchmarks.cpp $(LDFLAGS) -o $@
 
+json_benchmarks_simple: src/benchmarks_simple.cpp ../src/json.hpp
+	$(CXX) -std=c++11 $(CXXFLAGS) -DNDEBUG -O3 -flto -I../src $(<) $(LDFLAGS) -o $@
+
 number_jsons:
 	(test -e files/numbers/floats.json -a -e files/numbers/signed_ints.json -a -e files/numbers/unsigned_ints.json) || (cd files/numbers ; python generate.py)
 
 clean:
-	rm -f json_benchmarks files/numbers/*.json
+	rm -f json_benchmarks json_benchmarks_simple files/numbers/*.json
diff --git a/benchmarks/src/benchmarks.cpp b/benchmarks/src/benchmarks.cpp
index 55a4e478..a76c3783 100644
--- a/benchmarks/src/benchmarks.cpp
+++ b/benchmarks/src/benchmarks.cpp
@@ -34,6 +34,19 @@ static void bench(benchpress::context& ctx,
 {
     // using string streams for benchmarking to factor-out cold-cache disk
     // access.
+#if defined( FROMFILE )
+    std::ifstream istr;
+    {
+        istr.open( in_path, std::ifstream::in );
+
+        // read the stream once
+        json j;
+        istr >> j;
+        // clear flags and rewind
+        istr.clear();
+        istr.seekg(0);
+    }
+#else
     std::stringstream istr;
     {
         // read file into string stream
@@ -43,11 +56,12 @@ static void bench(benchpress::context& ctx,
 
         // read the stream once
         json j;
-        j << istr;
+        istr >> j;
         // clear flags and rewind
         istr.clear();
         istr.seekg(0);
     }
+#endif
 
     switch (mode)
     {
@@ -62,7 +76,7 @@ static void bench(benchpress::context& ctx,
                 istr.clear();
                 istr.seekg(0);
                 json j;
-                j << istr;
+                istr >> j;
             }
 
             break;
@@ -74,7 +88,7 @@ static void bench(benchpress::context& ctx,
         {
             // create JSON value from input
             json j;
-            j << istr;
+            istr >> j;
             std::stringstream ostr;
 
             ctx.reset_timer();
diff --git a/benchmarks/src/benchmarks_simple.cpp b/benchmarks/src/benchmarks_simple.cpp
new file mode 100644
index 00000000..4fad680a
--- /dev/null
+++ b/benchmarks/src/benchmarks_simple.cpp
@@ -0,0 +1,158 @@
+// 
+// benchmarks_simple.cpp -- a less complex version of benchmarks.cpp, that better reflects actual performance
+// 
+//     For some reason, the complexity of benchmarks.cpp doesn't allow
+// the compiler to optimize code using json.hpp effectively.  The
+// exact same tests, with the use of benchpress and cxxopts produces
+// much faster code, at least under g++.
+// 
+#include <fstream>
+#include <iostream>
+#include <chrono>
+#include <list>
+#include <tuple>
+
+#include <json.hpp>
+
+using json = nlohmann::json;
+
+enum class EMode { input, output, indent };
+
+static double bench(const EMode mode, size_t iters, const std::string& in_path )
+{
+    // using string streams for benchmarking to factor-out cold-cache disk
+    // access.  Define FROMFILE to use file I/O instead.
+#if defined( FROMFILE )
+    std::ifstream istr;
+    {
+        istr.open( in_path, std::ifstream::in );
+
+        // read the stream once
+        json j;
+        istr >> j;
+        // clear flags and rewind
+        istr.clear();
+        istr.seekg(0);
+    }
+#else
+    std::stringstream istr;
+    {
+        // read file into string stream
+        std::ifstream input_file(in_path);
+        istr << input_file.rdbuf();
+        input_file.close();
+
+        // read the stream once
+        json j;
+        istr >> j;
+        // clear flags and rewind
+        istr.clear();
+        istr.seekg(0);
+    }
+#endif
+    double tps = 0;
+    switch (mode)
+    {
+        // benchmarking input
+        case EMode::input:
+        {
+	    auto start = std::chrono::system_clock::now();
+            for (size_t i = 0; i < iters; ++i)
+            {
+                // clear flags and rewind
+                istr.clear();
+                istr.seekg(0);
+                json j;
+                istr >> j;
+            }
+	    auto ended = std::chrono::system_clock::now();
+	    tps = 1.0 / std::chrono::duration<double>( ended - start ).count();
+            break;
+        }
+
+        // benchmarking output
+        case EMode::output:
+        case EMode::indent:
+        {
+            // create JSON value from input
+            json j;
+            istr >> j;
+            std::stringstream ostr;
+
+	    auto start = std::chrono::system_clock::now();
+            for (size_t i = 0; i < iters; ++i)
+            {
+                if (mode == EMode::indent)
+                {
+                    ostr << j;
+                }
+                else
+                {
+                    ostr << std::setw(4) << j;
+                }
+
+                // reset data
+                ostr.str(std::string());
+            }
+	    auto ended = std::chrono::system_clock::now();
+	    tps = 1.0 / std::chrono::duration<double>( ended - start ).count();
+
+            break;
+        }
+    }
+    return tps;
+}
+
+template <typename T>
+struct average {
+    T _sum { 0 };
+    size_t _count { 0 };
+    T operator+=( const T &val_ ) { _sum += val_; +_count++; return val_; }
+    operator T() { return _sum / _count; }
+};
+
+// Execute each test approximately enough times to get near 1
+// transaction per second, and compute the average; a single aggregate
+// number that gives a performance metric representing both parsing
+// and output.
+
+int main( int, char ** )
+{
+    std::list<std::tuple<std::string, EMode, size_t, std::string>> tests {
+	{ "parse jeopardy.json",	EMode::input,   2, "files/jeopardy/jeopardy.json" },
+	{ "parse canada.json",		EMode::input,  30, "files/nativejson-benchmark/canada.json" },
+	{ "parse citm_catalog.json",	EMode::input, 120, "files/nativejson-benchmark/citm_catalog.json" },
+	{ "parse twitter.json",		EMode::input, 225, "files/nativejson-benchmark/twitter.json" },
+	{ "parse floats.json",		EMode::input,   5, "files/numbers/floats.json" },
+	{ "parse signed_ints.json",	EMode::input,   6, "files/numbers/signed_ints.json" },
+	{ "parse unsigned_ints.json",	EMode::input,   6, "files/numbers/unsigned_ints.json" },
+	{ "dump jeopardy.json",		EMode::output,  5, "files/jeopardy/jeopardy.json" },
+	{ "dump jeopardy.json w/ind.",	EMode::indent,  5, "files/jeopardy/jeopardy.json" },
+	{ "dump floats.json",		EMode::output,  2, "files/numbers/floats.json" },
+	{ "dump signed_ints.json",	EMode::output, 20, "files/numbers/signed_ints.json" },
+    };
+    
+    average<double> avg;
+    for ( auto t : tests ) {
+	std::string name, path;
+	EMode mode;
+	size_t iters;
+	std::tie(name, mode, iters, path) = t;
+	auto tps = bench( mode, iters, path );
+	avg += tps;
+	std::cout
+	    << std::left 
+	    << std::setw( 30 ) << name
+	    << std::right	    
+	    << " x " 	<< std::setw(  3 ) << iters
+	    << std::left
+	    << " == " 	<< std::setw( 10 ) << tps
+	    << std::right
+	    << " TPS, "	<< std::setw(  8 ) << std::round( tps * 1e6 / iters )
+	    << " ms/op"
+	    << std::endl;
+    }
+    std::cout << std::setw( 40 ) << "" << std::string( 10, '-' ) << std::endl;
+    std::cout << std::setw( 40 ) << "" << std::setw( 10 ) << std::left << avg << " TPS Average" << std::endl;
+    return 0;
+}

From a8cc7a1bc8e8addda9504e8510ff95a68d86e4b3 Mon Sep 17 00:00:00 2001
From: Perry Kundert <perry@hardconsulting.com>
Date: Mon, 16 Oct 2017 08:06:10 -0700
Subject: [PATCH 17/17] Consistently use std::char_traits int_type-->char
 conversion intrinsics

---
 src/json.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/json.hpp b/src/json.hpp
index 750ae51a..1fa90456 100644
--- a/src/json.hpp
+++ b/src/json.hpp
@@ -2674,7 +2674,7 @@ scan_number_done:
     {
         yytext.clear();
         token_string.clear();
-        token_string.push_back(static_cast<char>(current));
+        token_string.push_back(std::char_traits<char>::to_char_type(current));
     }
 
     /*