From 3a6a1860498041b48f75b3665bf37c6531742033 Mon Sep 17 00:00:00 2001 From: jaseg Date: Thu, 29 Dec 2022 18:30:07 +0100 Subject: Produce adaptive, css-only, single-file, pretty html output --- diff2HtmlCompare.py | 825 +++++++++++++++++++++++++++++----------------------- 1 file changed, 454 insertions(+), 371 deletions(-) (limited to 'diff2HtmlCompare.py') diff --git a/diff2HtmlCompare.py b/diff2HtmlCompare.py index 644ce93..350cc9d 100644 --- a/diff2HtmlCompare.py +++ b/diff2HtmlCompare.py @@ -22,408 +22,491 @@ import io import os +import string +import html +import textwrap import sys import difflib import argparse -import pygments import webbrowser -from pygments.lexers import guess_lexer_for_filename -from pygments.lexer import RegexLexer +from collections import defaultdict +from pathlib import Path +import re +from itertools import groupby, chain + +import pygments from pygments.formatters import HtmlFormatter -from pygments.token import * +from pygments.lexer import RegexLexer +from pygments import token -# Monokai is not quite right yet -PYGMENTS_STYLES = ["vs", "xcode"] -HTML_TEMPLATE = """ +HTML_TEMPLATE = r''' - + - - - %(html_title)s - + $title - - - + + -
-
- %(page_title)s -
-
-
- - -
-
- - -
-
- - -
-
- - -
-
- - -
-
-
-
-
-
- ❬ Original -
-
- 01234567890123456789012345678901234567890123456789012345678901234567890123456789 -
- %(original_code)s -
-
-
- ❭ Modified -
-
- 01234567890123456789012345678901234567890123456789012345678901234567890123456789 -
- %(modified_code)s -
-
- - +$body -""" - - -class DefaultLexer(RegexLexer): - """ - Simply lex each line as a token. - """ - - name = 'Default' - aliases = ['default'] - filenames = ['*'] +''' + +PYGMENTS_CSS = ''' +body .hll { background-color: #ffffcc } +body { background: #ffffff; } +body .c { color: #177500 } /* Comment */ +body .err { color: #000000 } /* Error */ +body .k { color: #A90D91 } /* Keyword */ +body .l { color: #1C01CE } /* Literal */ +body .n { color: #000000 } /* Name */ +body .o { color: #000000 } /* Operator */ +body .cm { color: #177500 } /* Comment.Multiline */ +body .cp { color: #633820 } /* Comment.Preproc */ +body .c1 { color: #177500 } /* Comment.Single */ +body .cs { color: #177500 } /* Comment.Special */ +body .kc { color: #A90D91 } /* Keyword.Constant */ +body .kd { color: #A90D91 } /* Keyword.Declaration */ +body .kn { color: #A90D91 } /* Keyword.Namespace */ +body .kp { color: #A90D91 } /* Keyword.Pseudo */ +body .kr { color: #A90D91 } /* Keyword.Reserved */ +body .kt { color: #A90D91 } /* Keyword.Type */ +body .ld { color: #1C01CE } /* Literal.Date */ +body .m { color: #1C01CE } /* Literal.Number */ +body .s { color: #C41A16 } /* Literal.String */ +body .na { color: #836C28 } /* Name.Attribute */ +body .nb { color: #A90D91 } /* Name.Builtin */ +body .nc { color: #3F6E75 } /* Name.Class */ +body .no { color: #000000 } /* Name.Constant */ +body .nd { color: #000000 } /* Name.Decorator */ +body .ni { color: #000000 } /* Name.Entity */ +body .ne { color: #000000 } /* Name.Exception */ +body .nf { color: #000000 } /* Name.Function */ +body .nl { color: #000000 } /* Name.Label */ +body .nn { color: #000000 } /* Name.Namespace */ +body .nx { color: #000000 } /* Name.Other */ +body .py { color: #000000 } /* Name.Property */ +body .nt { color: #000000 } /* Name.Tag */ +body .nv { color: #000000 } /* Name.Variable */ +body .ow { color: #000000 } /* Operator.Word */ +body .mb { color: #1C01CE } /* Literal.Number.Bin */ +body .mf { color: #1C01CE } /* Literal.Number.Float */ +body .mh { color: #1C01CE } /* Literal.Number.Hex */ +body .mi { color: #1C01CE } /* Literal.Number.Integer */ +body .mo { color: #1C01CE } /* Literal.Number.Oct */ +body .sb { color: #C41A16 } /* Literal.String.Backtick */ +body .sc { color: #2300CE } /* Literal.String.Char */ +body .sd { color: #C41A16 } /* Literal.String.Doc */ +body .s2 { color: #C41A16 } /* Literal.String.Double */ +body .se { color: #C41A16 } /* Literal.String.Escape */ +body .sh { color: #C41A16 } /* Literal.String.Heredoc */ +body .si { color: #C41A16 } /* Literal.String.Interpol */ +body .sx { color: #C41A16 } /* Literal.String.Other */ +body .sr { color: #C41A16 } /* Literal.String.Regex */ +body .s1 { color: #C41A16 } /* Literal.String.Single */ +body .ss { color: #C41A16 } /* Literal.String.Symbol */ +body .bp { color: #5B269A } /* Name.Builtin.Pseudo */ +body .vc { color: #000000 } /* Name.Variable.Class */ +body .vg { color: #000000 } /* Name.Variable.Global */ +body .vi { color: #000000 } /* Name.Variable.Instance */ +body .il { color: #1C01CE } /* Literal.Number.Integer.Long */ + +/* +These styles are used to highlight each diff line. +Note: for partial like highlight change to "display:block-inline" +*/ +span.left_diff_change { + background-color: #FFE5B5; + display: block +} +span.left_diff_add { + background-color: #eeeeee; + display: block +} +span.left_diff_del { + background-color: #ffdddd; + display: block +} +span.lineno_q { + display: block; +} +span.right_diff_change { + background-color: #FFE5B5; + display: block +} +span.right_diff_add { + background-color: #ddffdd; + display: block +} +span.right_diff_del { + background-color: #eeeeee; + display: block +} +span.clearbg { + background-color: transparent; +} +''' + +class SexprLexer(RegexLexer): + name = 'KiCad S-Expression' + aliases = ['sexp'] + filenames = ['*.kicad_mod', '*.kicad_sym'] tokens = { 'root': [ - (r'.*\n', Text), + (r'\s+', token.Whitespace), + (r'[()]', token.Punctuation), + (r'([+-]?\d+\.\d+)(?=[)\s])', token.Number), + (r'(-?\d+)(?=[)\s])', token.Number), + (r'"((?:[^"]|\\")*)"(?=[)\s])', token.String), + (r'([^()"\s]+)(?=[)\s])', token.Name), ] } - -class DiffHtmlFormatter(HtmlFormatter): - """ - Formats a single source file with pygments and adds diff highlights based on the - diff details given. - """ - isLeft = False - diffs = None - - def __init__(self, isLeft, diffs, *args, **kwargs): - self.isLeft = isLeft - self.diffs = diffs - super(DiffHtmlFormatter, self).__init__(*args, **kwargs) - - def wrap(self, source, outfile): - return self._wrap_code(source) - - def getDiffLineNos(self): - retlinenos = [] - for idx, ((left_no, left_line), (right_no, right_line), change) in enumerate(self.diffs): - no = None - if self.isLeft: - if change: - if isinstance(left_no, int) and isinstance(right_no, int): - no = '' + \ - str(left_no) + "" - elif isinstance(left_no, int) and not isinstance(right_no, int): - no = '' + \ - str(left_no) + "" - elif not isinstance(left_no, int) and isinstance(right_no, int): - no = ' ' +from pygments.formatter import Formatter +from pygments.token import STANDARD_TYPES + +from functools import lru_cache + +@lru_cache(maxsize=256) +def get_token_class(ttype): + while not (name := STANDARD_TYPES.get(ttype)): + ttype = ttype.parent + return name + +def iter_token_lines(tokensource): + lineno = 1 + for ttype, value in tokensource: + left, newline, right = value.partition('\n') + while newline: + yield lineno, ttype, left + lineno += 1 + left, newline, right = right.partition('\n') + if left != '': + yield lineno, ttype, left + +class RecordFormatter(Formatter): + def __init__(self, side, diff): + self.side = side + if side == 'right': + diff = [(right, left, change) for left, right, change in diff] + self.diff = diff + + def format(self, tokensource, outfile): + diff = iter(self.diff) + self.lines = [] + for lineno, tokens in groupby(iter_token_lines(tokensource), key=lambda arg: arg[0]): + + for (lineno_ours, diff_ours), (lineno_theirs, _diff_theirs), change in diff: + if lineno_ours == lineno: + break else: - no = '' + str(left_no) + "" + self.lines.append(f'') + assert lineno_ours == lineno + + if not change: + change_class = '' + elif not lineno_ours or not lineno_theirs: + change_class = ' insert' else: - if change: - if isinstance(left_no, int) and isinstance(right_no, int): - no = '' + \ - str(right_no) + "" - elif isinstance(left_no, int) and not isinstance(right_no, int): - no = ' ' - elif not isinstance(left_no, int) and isinstance(right_no, int): - no = '' + \ - str(right_no) + "" - else: - no = '' + str(right_no) + "" - - retlinenos.append(no) - - return retlinenos - - def _wrap_code(self, source): - source = list(source) - yield 0, '
'
-
-        for idx, ((left_no, left_line), (right_no, right_line), change) in enumerate(self.diffs):
-            # print idx, ((left_no, left_line),(right_no, right_line),change)
-            try:
-                if self.isLeft:
-                    if change:
-                        if isinstance(left_no, int) and isinstance(right_no, int) and left_no <= len(source):
-                            i, t = source[left_no - 1]
-                            t = '' + t + ""
-                        elif isinstance(left_no, int) and not isinstance(right_no, int) and left_no <= len(source):
-                            i, t = source[left_no - 1]
-                            t = '' + t + ""
-                        elif not isinstance(left_no, int) and isinstance(right_no, int):
-                            i, t = 1, left_line
-                            t = '' + t + ""
-                        else:
-                            raise
-                    else:
-                        if left_no <= len(source):
-                            i, t = source[left_no - 1]
-                        else:
-                            i = 1
-                            t = left_line
-                else:
-                    if change:
-                        if isinstance(left_no, int) and isinstance(right_no, int) and right_no <= len(source):
-                            i, t = source[right_no - 1]
-                            t = '' + t + ""
-                        elif isinstance(left_no, int) and not isinstance(right_no, int):
-                            i, t = 1, right_line
-                            t = '' + t + ""
-                        elif not isinstance(left_no, int) and isinstance(right_no, int) and right_no <= len(source):
-                            i, t = source[right_no - 1]
-                            t = '' + t + ""
-                        else:
-                            raise
+                change_class = ' change' 
+
+            line = f'{lineno}'
+
+            parts = re.split(r'(\00.|\01|$)', diff_ours)
+            source_pos = 0
+            diff_markers = []
+            if lineno_theirs: # Do not highlight word changes if the whole line got added or removed.
+                for span, sep in zip(parts[0:-2:2], parts[1:-2:2]):
+                    source_pos += len(span)
+                    diff_markers.append((source_pos, sep))
+
+            diff_class = ''
+            source_pos = 0
+            for _lineno, ttype, value in tokens:
+                css_class = get_token_class(ttype)
+
+                while diff_markers:
+                    next_marker_pos, next_marker_type = diff_markers[0]
+                    if source_pos <= next_marker_pos < source_pos + len(value):
+                        split_pos = next_marker_pos - source_pos
+                        left, value = value[:split_pos], value[split_pos:]
+                        line += f'{html.escape(left)}'
+                        source_pos += len(left)
+                        diff_class = ' word_change' if next_marker_type.startswith('\0') else ''
+                        diff_markers = diff_markers[1:]
                     else:
-                        if right_no <= len(source):
-                            i, t = source[right_no - 1]
-                        else:
-                            i = 1
-                            t = right_line
-                yield i, t
-            except:
-                # print "WARNING! failed to enumerate diffs fully!"
-                pass  # this is expected sometimes
-        yield 0, '\n
' - - def _wrap_tablelinenos(self, inner): - dummyoutfile = io.StringIO() - lncount = 0 - for t, line in inner: - if t: - lncount += 1 - - # compatibility Python v2/v3 - if sys.version_info > (3,0): - dummyoutfile.write(line) - else: - dummyoutfile.write(unicode(line)) - - fl = self.linenostart - mw = len(str(lncount + fl - 1)) - sp = self.linenospecial - st = self.linenostep - la = self.lineanchors - aln = self.anchorlinenos - nocls = self.noclasses - - lines = [] - for i in self.getDiffLineNos(): - lines.append('%s' % (i,)) - - ls = ''.join(lines) - - # in case you wonder about the seemingly redundant
here: since the - # content in the other cell also is wrapped in a div, some browsers in - # some configurations seem to mess up the formatting... - if nocls: - yield 0, ('' % self.cssclass + - '
' - '
' +
-                      ls + '
') - else: - yield 0, ('' % self.cssclass + - '
' +
-                      ls + '
') - yield 0, dummyoutfile.getvalue() - yield 0, '
' - - -class CodeDiff(object): - """ - Manages a pair of source files and generates a single html diff page comparing - the contents. - """ - pygmentsCssFile = "./deps/codeformats/%s.css" - diffCssFile = "./deps/diff.css" - diffJsFile = "./deps/diff.js" - resetCssFile = "./deps/reset.css" - jqueryJsFile = "./deps/jquery.min.js" - - def __init__(self, fromfile, tofile, fromtxt=None, totxt=None, name=None): - self.filename = name - self.fromfile = fromfile - if fromtxt == None: - try: - with io.open(fromfile) as f: - self.fromlines = f.readlines() - except Exception as e: - print("Problem reading file %s" % fromfile) - print(e) - sys.exit(1) - else: - self.fromlines = [n + "\n" for n in fromtxt.split("\n")] - self.leftcode = "".join(self.fromlines) - - self.tofile = tofile - if totxt == None: - try: - with io.open(tofile) as f: - self.tolines = f.readlines() - except Exception as e: - print("Problem reading file %s" % tofile) - print(e) - sys.exit(1) - else: - self.tolines = [n + "\n" for n in totxt.split("\n")] - self.rightcode = "".join(self.tolines) - - def getDiffDetails(self, fromdesc='', todesc='', context=False, numlines=5, tabSize=8): - # change tabs to spaces before it gets more difficult after we insert - # markkup - def expand_tabs(line): - # hide real spaces - line = line.replace(' ', '\0') - # expand tabs into spaces - line = line.expandtabs(tabSize) - # replace spaces from expanded tabs back into tab characters - # (we'll replace them with markup after we do differencing) - line = line.replace(' ', '\t') - return line.replace('\0', ' ').rstrip('\n') - - self.fromlines = [expand_tabs(line) for line in self.fromlines] - self.tolines = [expand_tabs(line) for line in self.tolines] - - # create diffs iterator which generates side by side from/to data - if context: - context_lines = numlines - else: - context_lines = None - - diffs = difflib._mdiff(self.fromlines, self.tolines, context_lines, - linejunk=None, charjunk=difflib.IS_CHARACTER_JUNK) - return list(diffs) - - def format(self, options): - self.diffs = self.getDiffDetails(self.fromfile, self.tofile) - - if options.verbose: - for diff in self.diffs: - print("%-6s %-80s %-80s" % (diff[2], diff[0], diff[1])) - - fields = ((self.leftcode, True, self.fromfile), - (self.rightcode, False, self.tofile)) - - codeContents = [] - for (code, isLeft, filename) in fields: - - inst = DiffHtmlFormatter(isLeft, - self.diffs, - nobackground=False, - linenos=True, - style=options.syntax_css) - - try: - self.lexer = guess_lexer_for_filename(self.filename, code) - - except pygments.util.ClassNotFound: - if options.verbose: - print("No Lexer Found! Using default...") - - self.lexer = DefaultLexer() - - formatted = pygments.highlight(code, self.lexer, inst) - - codeContents.append(formatted) - - answers = { - "html_title": self.filename, - "reset_css": self.resetCssFile, - "pygments_css": self.pygmentsCssFile % options.syntax_css, - "diff_css": self.diffCssFile, - "page_title": self.filename, - "original_code": codeContents[0], - "modified_code": codeContents[1], - "jquery_js": self.jqueryJsFile, - "diff_js": self.diffJsFile, - "page_width": "page-80-width" if options.print_width else "page-full-width" - } - - self.htmlContents = HTML_TEMPLATE % answers - - def write(self, path): - fh = io.open(path, 'w') - fh.write(self.htmlContents) - fh.close() - - -def main(file1, file2, outputpath, options): - codeDiff = CodeDiff(file1, file2, name=file2) - codeDiff.format(options) - codeDiff.write(outputpath) - -def show(outputpath): - path = os.path.abspath(outputpath) - webbrowser.open('file://' + path) + break + line += f'{html.escape(value)}' + source_pos += len(value) + + if css_class is not None: + line += '' + + line += '' + self.lines.append(line) + + for _ours_empty, (lineno_theirs, _diff_theirs), change in diff: + self.lines.append(f'') + assert change and lineno_theirs + +def html_diff_content(old, new): + diff = list(difflib._mdiff(old.splitlines(), new.splitlines())) + + fmt_l = RecordFormatter('left', diff) + pygments.highlight(old, SexprLexer(), fmt_l) + + fmt_r = RecordFormatter('right', diff) + pygments.highlight(new, SexprLexer(), fmt_r) + + return '\n'.join(chain.from_iterable(zip(fmt_l.lines, fmt_r.lines))) + +def html_diff_block(old, new, filename): + code = html_diff_content(old, new) + return textwrap.dedent(f'''
+
{filename}
+
+ {code} +
+
''') + if __name__ == "__main__": - description = """Given two source files this application\ + description = """Given two source files or directories this application\ creates an html page which highlights the differences between the two. """ parser = argparse.ArgumentParser(description=description) - parser.add_argument('-s', '--show', action='store_true', - help='show html in a browser.') - parser.add_argument('-p', '--print-width', action='store_true', - help='Restrict code to 80 columns wide. (printer friendly in landscape)') - parser.add_argument('-c', '--syntax-css', action='store', default="vs", - help='Pygments CSS for code syntax highlighting. Can be one of: %s' % str(PYGMENTS_STYLES)) - parser.add_argument('-v', '--verbose', action='store_true', help='show verbose output.') - parser.add_argument('file1', help='source file to compare ("before" file).') - parser.add_argument('file2', help='source file to compare ("after" file).') - + parser.add_argument('-b', '--open', action='store_true', help='Open output file in a browser') + parser.add_argument('-s', '--syntax-css', help='Path to custom Pygments CSS file for code syntax highlighting') + parser.add_argument('-t', '--pagetitle', help='Override page title of output HTML file') + parser.add_argument('-o', '--output', default=sys.stdout, type=argparse.FileType('w'), help='Name of output file (default: stdout)') + parser.add_argument('--header', action='store_true', help='Only output HTML header with stylesheets and stuff, and no diff') + parser.add_argument('--content', action='store_true', help='Only output HTML content, without header') + parser.add_argument('old', help='source file or directory to compare ("before" file)') + parser.add_argument('new', help='source file or directory to compare ("after" file)') args = parser.parse_args() - if args.syntax_css not in PYGMENTS_STYLES: - raise ValueError("Syntax CSS (-c) must be one of %r." % PYGMENTS_STYLES) + if args.open and args.output == sys.stdout: + print('Error: --open requires --output to be given.') + parser.print_usage() + sys.exit(2) + + old, new = Path(args.old), Path(args.new) + if not old.exists(): + print(f'Error: Path "{old}" does not exist.') + sys.exit(1) + + if not new.exists(): + print(f'Error: Path "{new}" does not exist.') + sys.exit(1) + + if old.is_file() != new.is_file(): + print(f'Error: You must give either two files, or two paths to compare, not a mix of both.') + sys.exit(1) + + if old.is_file(): + found_files = {str(new): (old, new)} + else: + found_files = defaultdict(lambda: [None, None]) + for fn in old.glob('**/*'): + found_files[str(fn.relative_to(old))][0] = fn + for fn in new.glob('**/*'): + found_files[str(fn.relative_to(new))][1] = fn + + pagetitle = args.pagetitle or f'diff: {old} / {new}' + if args.syntax_css: + syntax_css = Path(args.syntax_css).read_text() + else: + syntax_css = PYGMENTS_CSS + + diff_blocks = [] + for suffix, (old, new) in sorted(found_files.items()): + old = '' if old is None else old.read_text() + new = '' if new is None else new.read_text() + + diff_blocks.append(html_diff_block(old, new, suffix)) + + print(string.Template(HTML_TEMPLATE).substitute( + title=pagetitle, + pygments_css=syntax_css, + body='\n'.join(diff_blocks)), file=args.output) + + if args.open: + webbrowser.open('file://' + str(Path(args.output.name).absolute())) - outputpath = "index.html" - main(args.file1, args.file2, outputpath, args) - if args.show: - show(outputpath) -- cgit