From 754c38e871497013e22de894e696122cb7991d93 Mon Sep 17 00:00:00 2001 From: Aaron Burghardt Date: Fri, 27 Feb 2015 20:19:15 -0500 Subject: [PATCH 01/11] Moved buffer management into the lexer class and implemented YYFILL so that streams are read incrementally. --- src/json.hpp.re2c | 66 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 50 insertions(+), 16 deletions(-) diff --git a/src/json.hpp.re2c b/src/json.hpp.re2c index 9226dc13..e6b8a5ab 100644 --- a/src/json.hpp.re2c +++ b/src/json.hpp.re2c @@ -3046,11 +3046,20 @@ class basic_json using lexer_char_t = unsigned char; /// constructor with a given buffer - inline lexer(const string_t& s) noexcept - : m_content(reinterpret_cast(s.c_str())) + inline lexer(const string_t& s) noexcept + : m_buffer(s), m_stream(nullptr) + { + m_content = reinterpret_cast(s.c_str()); + m_start = m_cursor = m_content; + m_limit = m_content + s.size(); + } + inline lexer(std::istream* s) noexcept + : m_stream(s) { + getline(*m_stream, m_buffer); + m_content = reinterpret_cast(m_buffer.c_str()); m_start = m_cursor = m_content; - m_limit = m_content + s.size(); + m_limit = m_content + m_buffer.size(); } /// default constructor @@ -3182,6 +3191,8 @@ class basic_json // remember the begin of the token m_start = m_cursor; +#define YYFILL(n) { size_t offset_marker = m_marker - m_start; yyfill(n); m_marker = m_start + offset_marker; } + /*!re2c re2c:define:YYCTYPE = lexer_char_t; re2c:define:YYCURSOR = m_cursor; @@ -3190,7 +3201,6 @@ class basic_json re2c:indent:string = " "; re2c:indent:top = 1; re2c:labelprefix = "basic_json_parser_"; - re2c:yyfill:enable = 0; // whitespace ws = [ \t\n\r]+; @@ -3240,8 +3250,28 @@ class basic_json // anything else is an error . { return token_type::parse_error; } */ + } + void yyfill(int n) noexcept + { + if (not m_stream or not *m_stream) return; + + ssize_t offset_start = m_start - m_content; + ssize_t offset_cursor = m_cursor - m_start; + ssize_t offset_limit = m_limit - m_start; + + m_buffer.erase(0, offset_start); + std::string line; + std::getline(*m_stream, line); + m_buffer += line; + + m_content = reinterpret_cast(m_buffer.c_str()); //reinterpret_cast(endptr) + m_start = m_content + offset_start; + m_cursor = m_start + offset_cursor; + m_limit = m_start + offset_limit; + } + /// return string representation of last read token inline string_t get_token() const noexcept { @@ -3404,14 +3434,20 @@ class basic_json } private: + /// optional input stream + std::istream* m_stream; /// the buffer + string_t m_buffer; + /// the buffer pointer const lexer_char_t* m_content = nullptr; - /// pointer to he beginning of the current symbol + /// pointer to the beginning of the current symbol const lexer_char_t* m_start = nullptr; /// pointer to the current symbol const lexer_char_t* m_cursor = nullptr; /// pointer to the end of the buffer const lexer_char_t* m_limit = nullptr; + /// YYSTATE + int m_state; }; /*! @@ -3421,24 +3457,24 @@ class basic_json { public: /// constructor for strings - inline parser(const string_t& s) : m_buffer(s), m_lexer(m_buffer) + inline parser(const string_t& s) : m_lexer(s) { // read first token get_token(); } /// a parser reading from an input stream - inline parser(std::istream& _is) + inline parser(std::istream& _is) : m_lexer(&_is) { - while (_is) - { - string_t input_line; - std::getline(_is, input_line); - m_buffer += input_line; - } +// while (_is) +// { +// string_t input_line; +// std::getline(_is, input_line); +// m_buffer += input_line; +// } // initializer lexer - m_lexer = lexer(m_buffer); +// m_lexer = std::move(lexer(_is)); // read first token get_token(); @@ -3625,8 +3661,6 @@ class basic_json } private: - /// the buffer - string_t m_buffer; /// the type of the last read token typename lexer::token_type last_token = lexer::token_type::uninitialized; /// the lexer From e4cc42cf818cb738a74c591a1e5a12dad9b107d1 Mon Sep 17 00:00:00 2001 From: Aaron Burghardt Date: Fri, 27 Feb 2015 20:27:37 -0500 Subject: [PATCH 02/11] Moved m_marker in lexer::scan() to be a member of lexer. --- src/json.hpp.re2c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/json.hpp.re2c b/src/json.hpp.re2c index e6b8a5ab..7174c380 100644 --- a/src/json.hpp.re2c +++ b/src/json.hpp.re2c @@ -3186,7 +3186,7 @@ class basic_json inline token_type scan() noexcept { // pointer for backtracking information - const lexer_char_t* m_marker = nullptr; + m_marker = nullptr; // remember the begin of the token m_start = m_cursor; @@ -3258,6 +3258,7 @@ class basic_json if (not m_stream or not *m_stream) return; ssize_t offset_start = m_start - m_content; + ssize_t offset_marker = m_marker - m_start; ssize_t offset_cursor = m_cursor - m_start; ssize_t offset_limit = m_limit - m_start; @@ -3268,6 +3269,7 @@ class basic_json m_content = reinterpret_cast(m_buffer.c_str()); //reinterpret_cast(endptr) m_start = m_content + offset_start; + m_marker = m_start + offset_marker; m_cursor = m_start + offset_cursor; m_limit = m_start + offset_limit; } @@ -3442,6 +3444,8 @@ class basic_json const lexer_char_t* m_content = nullptr; /// pointer to the beginning of the current symbol const lexer_char_t* m_start = nullptr; + /// pointer for backtracking information + const lexer_char_t* m_marker = nullptr; /// pointer to the current symbol const lexer_char_t* m_cursor = nullptr; /// pointer to the end of the buffer From e3e18d7b8501965779a042de1a96f075ccfa5739 Mon Sep 17 00:00:00 2001 From: Aaron Burghardt Date: Fri, 27 Feb 2015 20:31:03 -0500 Subject: [PATCH 03/11] Deleted extraneous comment. --- src/json.hpp.re2c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/json.hpp.re2c b/src/json.hpp.re2c index 7174c380..777fdbbe 100644 --- a/src/json.hpp.re2c +++ b/src/json.hpp.re2c @@ -3267,7 +3267,7 @@ class basic_json std::getline(*m_stream, line); m_buffer += line; - m_content = reinterpret_cast(m_buffer.c_str()); //reinterpret_cast(endptr) + m_content = reinterpret_cast(m_buffer.c_str()); m_start = m_content + offset_start; m_marker = m_start + offset_marker; m_cursor = m_start + offset_cursor; From 0d79e7c2a27378e149fc3e5747b3a8f22065dc20 Mon Sep 17 00:00:00 2001 From: Aaron Burghardt Date: Fri, 27 Feb 2015 20:38:05 -0500 Subject: [PATCH 04/11] Removed duplicate m_marker updates in YYFILL macro. --- src/json.hpp.re2c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/json.hpp.re2c b/src/json.hpp.re2c index 777fdbbe..43122080 100644 --- a/src/json.hpp.re2c +++ b/src/json.hpp.re2c @@ -3191,7 +3191,7 @@ class basic_json // remember the begin of the token m_start = m_cursor; -#define YYFILL(n) { size_t offset_marker = m_marker - m_start; yyfill(n); m_marker = m_start + offset_marker; } +#define YYFILL(n) { yyfill(n); } /*!re2c re2c:define:YYCTYPE = lexer_char_t; From b66c306d475ef36d9c51cf32830dcff3d2c00a5d Mon Sep 17 00:00:00 2001 From: Aaron Burghardt Date: Sat, 28 Feb 2015 07:13:17 -0500 Subject: [PATCH 05/11] Removed unused member m_state. --- src/json.hpp.re2c | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/json.hpp.re2c b/src/json.hpp.re2c index 43122080..e8b2e019 100644 --- a/src/json.hpp.re2c +++ b/src/json.hpp.re2c @@ -3450,8 +3450,6 @@ class basic_json const lexer_char_t* m_cursor = nullptr; /// pointer to the end of the buffer const lexer_char_t* m_limit = nullptr; - /// YYSTATE - int m_state; }; /*! From ec6979bf76220d8d962e58690ec2487de3c3fd11 Mon Sep 17 00:00:00 2001 From: Aaron Burghardt Date: Sat, 28 Feb 2015 08:32:12 -0500 Subject: [PATCH 06/11] Purged old commented-out code. --- src/json.hpp.re2c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/src/json.hpp.re2c b/src/json.hpp.re2c index e8b2e019..1a732ef3 100644 --- a/src/json.hpp.re2c +++ b/src/json.hpp.re2c @@ -3468,16 +3468,6 @@ class basic_json /// a parser reading from an input stream inline parser(std::istream& _is) : m_lexer(&_is) { -// while (_is) -// { -// string_t input_line; -// std::getline(_is, input_line); -// m_buffer += input_line; -// } - - // initializer lexer -// m_lexer = std::move(lexer(_is)); - // read first token get_token(); } From edb697293b016f7f169c215480eb459779887612 Mon Sep 17 00:00:00 2001 From: Aaron Burghardt Date: Sat, 28 Feb 2015 08:42:20 -0500 Subject: [PATCH 07/11] Fixed variable adjustments in yyfill(). --- src/json.hpp.re2c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/json.hpp.re2c b/src/json.hpp.re2c index 1a732ef3..5ed8608c 100644 --- a/src/json.hpp.re2c +++ b/src/json.hpp.re2c @@ -3260,7 +3260,6 @@ class basic_json ssize_t offset_start = m_start - m_content; ssize_t offset_marker = m_marker - m_start; ssize_t offset_cursor = m_cursor - m_start; - ssize_t offset_limit = m_limit - m_start; m_buffer.erase(0, offset_start); std::string line; @@ -3268,10 +3267,10 @@ class basic_json m_buffer += line; m_content = reinterpret_cast(m_buffer.c_str()); - m_start = m_content + offset_start; + m_start = m_content; m_marker = m_start + offset_marker; m_cursor = m_start + offset_cursor; - m_limit = m_start + offset_limit; + m_limit = m_start + m_buffer.size() - 1; } /// return string representation of last read token From 268fd444e65e57149d2399e4d978d0102feb6dfe Mon Sep 17 00:00:00 2001 From: Aaron Burghardt Date: Sat, 28 Feb 2015 22:14:57 -0500 Subject: [PATCH 08/11] Added comments to new method yyfill. --- src/json.hpp.re2c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/json.hpp.re2c b/src/json.hpp.re2c index 5ed8608c..7aad9680 100644 --- a/src/json.hpp.re2c +++ b/src/json.hpp.re2c @@ -3253,6 +3253,7 @@ class basic_json } + /// append data from the stream to the internal buffer void yyfill(int n) noexcept { if (not m_stream or not *m_stream) return; @@ -3261,6 +3262,9 @@ class basic_json ssize_t offset_marker = m_marker - m_start; ssize_t offset_cursor = m_cursor - m_start; + // The parser generator expects a minimum of n bytes to be appended, + // but by appending a line of data we will never split a token, so + // it should be safe to ignore the parameter. m_buffer.erase(0, offset_start); std::string line; std::getline(*m_stream, line); From 2855c70c27cff13701fd300a60ee6d5d38950b3f Mon Sep 17 00:00:00 2001 From: Aaron Burghardt Date: Sat, 28 Feb 2015 22:36:57 -0500 Subject: [PATCH 09/11] Use inplace configuration for yyfill and disable the parameter to yyfill. --- src/json.hpp.re2c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/json.hpp.re2c b/src/json.hpp.re2c index 7aad9680..b328880b 100644 --- a/src/json.hpp.re2c +++ b/src/json.hpp.re2c @@ -3191,13 +3191,13 @@ class basic_json // remember the begin of the token m_start = m_cursor; -#define YYFILL(n) { yyfill(n); } - /*!re2c re2c:define:YYCTYPE = lexer_char_t; re2c:define:YYCURSOR = m_cursor; re2c:define:YYLIMIT = m_limit; re2c:define:YYMARKER = m_marker; + re2c:define:YYFILL = "{ yyfill(); }"; + re2c:yyfill:parameter = 0; re2c:indent:string = " "; re2c:indent:top = 1; re2c:labelprefix = "basic_json_parser_"; @@ -3254,7 +3254,7 @@ class basic_json } /// append data from the stream to the internal buffer - void yyfill(int n) noexcept + void yyfill() noexcept { if (not m_stream or not *m_stream) return; @@ -3262,9 +3262,6 @@ class basic_json ssize_t offset_marker = m_marker - m_start; ssize_t offset_cursor = m_cursor - m_start; - // The parser generator expects a minimum of n bytes to be appended, - // but by appending a line of data we will never split a token, so - // it should be safe to ignore the parameter. m_buffer.erase(0, offset_start); std::string line; std::getline(*m_stream, line); From 87746280cafe739107376861f3feafc600289281 Mon Sep 17 00:00:00 2001 From: Aaron Burghardt Date: Sun, 1 Mar 2015 06:21:47 -0500 Subject: [PATCH 10/11] Added parse() for streams. --- src/json.hpp.re2c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/json.hpp.re2c b/src/json.hpp.re2c index b328880b..7f7c098c 100644 --- a/src/json.hpp.re2c +++ b/src/json.hpp.re2c @@ -1716,6 +1716,12 @@ class basic_json return parser(s).parse(); } + /// deserialize from stream + static basic_json parse(std::istream& i) + { + return parser(i).parse(); + } + /// deserialize from stream friend std::istream& operator>>(std::istream& i, basic_json& j) { From 396f64a0585a5dfc16a384a4845d64f672b950ec Mon Sep 17 00:00:00 2001 From: Aaron Burghardt Date: Mon, 2 Mar 2015 15:25:09 -0500 Subject: [PATCH 11/11] Replaced leading tabs with spaces (4 per tab). --- src/json.hpp.re2c | 54 +++++++++++++++++++++++------------------------ 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/src/json.hpp.re2c b/src/json.hpp.re2c index 7f7c098c..ea39159a 100644 --- a/src/json.hpp.re2c +++ b/src/json.hpp.re2c @@ -3052,18 +3052,18 @@ class basic_json using lexer_char_t = unsigned char; /// constructor with a given buffer - inline lexer(const string_t& s) noexcept - : m_buffer(s), m_stream(nullptr) - { - m_content = reinterpret_cast(s.c_str()); - m_start = m_cursor = m_content; - m_limit = m_content + s.size(); - } - inline lexer(std::istream* s) noexcept + inline lexer(const string_t& s) noexcept + : m_buffer(s), m_stream(nullptr) + { + m_content = reinterpret_cast(s.c_str()); + m_start = m_cursor = m_content; + m_limit = m_content + s.size(); + } + inline lexer(std::istream* s) noexcept : m_stream(s) { - getline(*m_stream, m_buffer); - m_content = reinterpret_cast(m_buffer.c_str()); + getline(*m_stream, m_buffer); + m_content = reinterpret_cast(m_buffer.c_str()); m_start = m_cursor = m_content; m_limit = m_content + m_buffer.size(); } @@ -3259,26 +3259,26 @@ class basic_json } - /// append data from the stream to the internal buffer - void yyfill() noexcept - { - if (not m_stream or not *m_stream) return; + /// append data from the stream to the internal buffer + void yyfill() noexcept + { + if (not m_stream or not *m_stream) return; - ssize_t offset_start = m_start - m_content; - ssize_t offset_marker = m_marker - m_start; - ssize_t offset_cursor = m_cursor - m_start; + ssize_t offset_start = m_start - m_content; + ssize_t offset_marker = m_marker - m_start; + ssize_t offset_cursor = m_cursor - m_start; - m_buffer.erase(0, offset_start); - std::string line; - std::getline(*m_stream, line); - m_buffer += line; + m_buffer.erase(0, offset_start); + std::string line; + std::getline(*m_stream, line); + m_buffer += line; - m_content = reinterpret_cast(m_buffer.c_str()); - m_start = m_content; - m_marker = m_start + offset_marker; - m_cursor = m_start + offset_cursor; - m_limit = m_start + m_buffer.size() - 1; - } + m_content = reinterpret_cast(m_buffer.c_str()); + m_start = m_content; + m_marker = m_start + offset_marker; + m_cursor = m_start + offset_cursor; + m_limit = m_start + m_buffer.size() - 1; + } /// return string representation of last read token inline string_t get_token() const noexcept