➕ added amalgamate Python script

2018-01-13 10:59:49 +01:00 · 2018-01-13 10:59:49 +01:00 · f4a55f26b0
commit f4a55f26b0
parent 85173f5627
5 changed files with 396 additions and 2 deletions
--- a/.gitignore
+++ b/.gitignore
@ -21,6 +21,4 @@ benchmarks/files/numbers/*.json
 cmake-build-debug

 test/test-*
-amalgamate
-single_include
 third_party/Amalgamate
--- a/develop/amalgamate/LICENSE.md
+++ b/develop/amalgamate/LICENSE.md
@ -0,0 +1,27 @@
+amalgamate.py - Amalgamate C source and header files
+Copyright (c) 2012, Erik Edlund <erik.edlund@32767.se>
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+   this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+ * Neither the name of Erik Edlund, nor the names of its contributors may
+   be used to endorse or promote products derived from this software without
+   specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/develop/amalgamate/README.md
+++ b/develop/amalgamate/README.md
@ -0,0 +1,66 @@
+
+# amalgamate.py - Amalgamate C source and header files
+
+Origin: https://bitbucket.org/erikedlund/amalgamate
+
+Mirror: https://github.com/edlund/amalgamate
+
+`amalgamate.py` aims to make it easy to use SQLite-style C source and header
+amalgamation in projects.
+
+For more information, please refer to: http://sqlite.org/amalgamation.html
+
+## Here be dragons
+
+`amalgamate.py` is quite dumb, it only knows the bare minimum about C code
+required in order to be able to handle trivial include directives. It can
+produce weird results for unexpected code.
+
+Things to be aware of:
+
+`amalgamate.py` will not handle complex include directives correctly:
+
+        #define HEADER_PATH "path/to/header.h"
+        #include HEADER_PATH
+
+In the above example, `path/to/header.h` will not be included in the
+amalgamation (HEADER_PATH is never expanded).
+
+`amalgamate.py` makes the assumption that each source and header file which
+is not empty will end in a new-line character, which is not immediately
+preceded by a backslash character (see 5.1.1.2p1.2 of ISO C99).
+
+`amalgamate.py` should be usable with C++ code, but raw string literals from
+C++11 will definitely cause problems:
+
+        R"delimiter(Terrible raw \ data " #include <sneaky.hpp>)delimiter"
+        R"delimiter(Terrible raw \ data " escaping)delimiter"
+
+In the examples above, `amalgamate.py` will stop parsing the raw string literal
+when it encounters the first quotation mark, which will produce unexpected
+results.
+
+## Installing amalgamate.py
+
+Python v.2.7.0 or higher is required.
+
+`amalgamate.py` can be tested and installed using the following commands:
+
+        ./test.sh && sudo -k cp ./amalgamate.py /usr/local/bin/
+
+## Using amalgamate.py
+
+        amalgamate.py [-v] -c path/to/config.json -s path/to/source/dir \
+                [-p path/to/prologue.(c|h)]
+
+ * The `-c, --config` option should specify the path to a JSON config file which
+   lists the source files, include paths and where to write the resulting
+   amalgamation. Have a look at `test/source.c.json` and `test/include.h.json`
+   to see two examples.
+
+ * The `-s, --source` option should specify the path to the source directory.
+   This is useful for supporting separate source and build directories.
+
+ * The `-p, --prologue` option should specify the path to a file which will be
+   added to the beginning of the amalgamation. It is optional.
+
--- a/develop/amalgamate/amalgamate.py
+++ b/develop/amalgamate/amalgamate.py
@ -0,0 +1,295 @@
+#!/usr/bin/env python
+
+# amalgamate.py - Amalgamate C source and header files.
+# Copyright (c) 2012, Erik Edlund <erik.edlund@32767.se>
+# 
+# Redistribution and use in source and binary forms, with or without modification,
+# are permitted provided that the following conditions are met:
+# 
+#  * Redistributions of source code must retain the above copyright notice,
+#  this list of conditions and the following disclaimer.
+# 
+#  * Redistributions in binary form must reproduce the above copyright notice,
+#  this list of conditions and the following disclaimer in the documentation
+#  and/or other materials provided with the distribution.
+# 
+#  * Neither the name of Erik Edlund, nor the names of its contributors may
+#  be used to endorse or promote products derived from this software without
+#  specific prior written permission.
+# 
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import argparse
+import datetime
+import json
+import os
+import re
+import sys
+
+class Amalgamation(object):
+	
+	# Prepends self.source_path to file_path if needed.
+	def actual_path(self, file_path):
+		if not os.path.isabs(file_path):
+			file_path = os.path.join(self.source_path, file_path)
+		return file_path
+		
+	# Search included file_path in self.include_paths and
+	# in source_dir if specified.
+	def find_included_file(self, file_path, source_dir):
+		search_dirs = self.include_paths[:]
+		if source_dir:
+			search_dirs.insert(0, source_dir)
+
+		for search_dir in search_dirs:
+			search_path = os.path.join(search_dir, file_path)
+			if os.path.isfile(self.actual_path(search_path)):
+				return search_path
+		return None		
+	
+	def __init__(self, args):
+		with open(args.config, 'r') as f:
+			config = json.loads(f.read())
+			for key in config:
+				setattr(self, key, config[key])
+			
+			self.verbose = args.verbose == "yes"
+			self.prologue = args.prologue
+			self.source_path = args.source_path
+			self.included_files = []
+	
+	# Generate the amalgamation and write it to the target file.
+	def generate(self):
+		amalgamation = ""
+		
+		if self.prologue:
+			with open(self.prologue, 'r') as f:
+				amalgamation += datetime.datetime.now().strftime(f.read())
+		
+		if self.verbose:
+			print("Config:")
+			print(" target        = {0}".format(self.target))
+			print(" working_dir   = {0}".format(os.getcwd()))
+			print(" include_paths = {0}".format(self.include_paths))
+		print("Creating amalgamation:")
+		for file_path in self.sources:
+			# Do not check the include paths while processing the source
+			# list, all given source paths must be correct.
+			actual_path = self.actual_path(file_path)
+			print(" - processing \"{0}\"".format(file_path))
+			t = TranslationUnit(file_path, self, True)
+			amalgamation += t.content
+		
+		with open(self.target, 'w') as f:
+			f.write(amalgamation)
+		
+		print("...done!\n")
+		if self.verbose:
+			print("Files processed: {0}".format(self.sources))
+			print("Files included: {0}".format(self.included_files))
+		print("")
+
+class TranslationUnit(object):
+	
+	# // C++ comment.
+	cpp_comment_pattern = re.compile(r"//.*?\n")
+	
+	# /* C comment. */
+	c_comment_pattern = re.compile(r"/\*.*?\*/", re.S)
+	
+	# "complex \"stri\\\ng\" value".
+	string_pattern = re.compile("[^']" r'".*?(?<=[^\\])"', re.S)
+	
+	# Handle simple include directives. Support for advanced
+	# directives where macros and defines needs to expanded is
+	# not a concern right now.
+	include_pattern = re.compile(
+		r'#\s*include\s+(<|")(?P<path>.*?)("|>)', re.S)
+
+	# #pragma once
+	pragma_once_pattern = re.compile(r'#\s*pragma\s+once', re.S)
+	
+	# Search for pattern in self.content, add the match to
+	# contexts if found and update the index accordingly.
+	def _search_content(self, index, pattern, contexts):
+		match = pattern.search(self.content, index)
+		if match:
+			contexts.append(match)
+			return match.end()
+		return index + 2
+	
+	# Return all the skippable contexts, i.e., comments and strings
+	def _find_skippable_contexts(self):
+		# Find contexts in the content in which a found include
+		# directive should not be processed.
+		skippable_contexts = []
+		
+		# Walk through the content char by char, and try to grab
+		# skippable contexts using regular expressions when found.
+		i = 1
+		content_len = len(self.content)
+		while i < content_len:
+			j = i - 1
+			current = self.content[i]
+			previous = self.content[j]
+			
+			if current == '"':
+				# String value.
+				i = self._search_content(j, self.string_pattern,
+					skippable_contexts)
+			elif current == '*' and previous == '/':
+				# C style comment.
+				i = self._search_content(j, self.c_comment_pattern,
+					skippable_contexts)
+			elif current == '/' and previous == '/':
+				# C++ style comment.
+				i = self._search_content(j, self.cpp_comment_pattern,
+					skippable_contexts)
+			else:
+				# Skip to the next char.
+				i += 1
+		
+		return skippable_contexts
+		
+	# Returns True if the match is within list of other matches
+	def _is_within(self, match, matches):
+		for m in matches:
+			if match.start() > m.start() and \
+					match.end() < m.end():
+				return True
+		return False
+	
+	# Removes pragma once from content
+	def _process_pragma_once(self):
+		content_len = len(self.content)
+		if content_len < len("#include <x>"):
+			return 0
+		
+		# Find contexts in the content in which a found include
+		# directive should not be processed.
+		skippable_contexts = self._find_skippable_contexts()
+
+		pragmas = []
+		pragma_once_match = self.pragma_once_pattern.search(self.content)
+		while pragma_once_match:
+			if not self._is_within(pragma_once_match, skippable_contexts):
+				pragmas.append(pragma_once_match)
+			
+			pragma_once_match = self.pragma_once_pattern.search(self.content,
+				pragma_once_match.end())
+		
+		# Handle all collected pragma once directives.
+		prev_end = 0
+		tmp_content = ''
+		for pragma_match in pragmas:
+			tmp_content += self.content[prev_end:pragma_match.start()]
+			prev_end = pragma_match.end()
+		tmp_content += self.content[prev_end:]
+		self.content = tmp_content
+	
+	# Include all trivial #include directives into self.content.
+	def _process_includes(self):
+		content_len = len(self.content)
+		if content_len < len("#include <x>"):
+			return 0
+		
+		# Find contexts in the content in which a found include
+		# directive should not be processed.
+		skippable_contexts = self._find_skippable_contexts()
+		
+		# Search for include directives in the content, collect those
+		# which should be included into the content.
+		includes = []
+		include_match = self.include_pattern.search(self.content)
+		while include_match:
+			if not self._is_within(include_match, skippable_contexts):
+				include_path = include_match.group("path")
+				search_same_dir = include_match.group(1) == '"'
+				found_included_path = self.amalgamation.find_included_file(
+					include_path, self.file_dir if search_same_dir else None)
+				if found_included_path:
+					includes.append((include_match, found_included_path))
+			
+			include_match = self.include_pattern.search(self.content,
+				include_match.end())
+		
+		# Handle all collected include directives.
+		prev_end = 0
+		tmp_content = ''
+		for include in includes:
+			include_match, found_included_path = include
+			tmp_content += self.content[prev_end:include_match.start()]
+			tmp_content += "// {0}\n".format(include_match.group(0))
+			if not found_included_path in self.amalgamation.included_files:
+				t = TranslationUnit(found_included_path, self.amalgamation, False)
+				tmp_content += t.content
+			prev_end = include_match.end()
+		tmp_content += self.content[prev_end:]
+		self.content = tmp_content
+		
+		return len(includes)
+		
+	# Make all content processing
+	def _process(self):
+		if not self.is_root:
+			self._process_pragma_once()
+		self._process_includes()
+	
+	def __init__(self, file_path, amalgamation, is_root):
+		self.file_path = file_path
+		self.file_dir = os.path.dirname(file_path)
+		self.amalgamation = amalgamation
+		self.is_root = is_root
+		
+		self.amalgamation.included_files.append(self.file_path)
+		
+		actual_path = self.amalgamation.actual_path(file_path)
+		if not os.path.isfile(actual_path):
+			raise IOError("File not found: \"{0}\"".format(file_path))
+		with open(actual_path, 'r') as f:
+			self.content = f.read()
+			self._process()
+
+def main():
+	description = "Amalgamate C source and header files."
+	usage = " ".join([
+		"amalgamate.py",
+		"[-v]",
+		"-c path/to/config.json",
+		"-s path/to/source/dir",
+		"[-p path/to/prologue.(c|h)]"
+	])
+	argsparser = argparse.ArgumentParser(
+		description=description, usage=usage)
+	
+	argsparser.add_argument("-v", "--verbose", dest="verbose",
+		choices=["yes", "no"], metavar="", help="be verbose")
+	
+	argsparser.add_argument("-c", "--config", dest="config",
+		required=True, metavar="", help="path to a JSON config file")
+	
+	argsparser.add_argument("-s", "--source", dest="source_path",
+		required=True, metavar="", help="source code path")
+	
+	argsparser.add_argument("-p", "--prologue", dest="prologue",
+		required=False, metavar="", help="path to a C prologue file")
+	
+	amalgamation = Amalgamation(argsparser.parse_args())
+	amalgamation.generate()
+
+if __name__ == "__main__":
+	main()
+
--- a/develop/amalgamate/config.json
+++ b/develop/amalgamate/config.json
@ -0,0 +1,8 @@
+{
+	"project": "JSON for Modern C++",
+	"target": "src/json.hpp",
+	"sources": [
+		"json.hpp"
+	],
+	"include_paths": ["."]
+}