#!/usr/bin/env python3 import re import sys import subprocess import string import click @click.command() @click.argument('texfile') @click.argument('bibliography') @click.argument('revision') def generate_git_tex_diff(texfile, bibliography, revision): with open(texfile) as f: tex_lines = len(list(f)) with open(bibliography) as f: bib_lines = len(list(f)) tex_proc = subprocess.run(['git', 'diff', f'-U{tex_lines+1}', '--word-diff', '--color=always', revision, texfile], check=True, capture_output=True) bib_proc = subprocess.run(['git', 'diff', f'-U{bib_lines+1}', '--word-diff', '--color=always', revision, bibliography], check=True, capture_output=True) ADDITION_RE_R = '\033\\[32m\\{\\+([^\033]*?)\\+\\}\033\\[m' DELETION_RE_R = '\033\\[31m\\[-([^\033]*?)\\-]\033\\[m' addition_re = re.compile(ADDITION_RE_R) deletion_re = re.compile(DELETION_RE_R) combined_re = re.compile(f'{DELETION_RE_R}{ADDITION_RE_R}') csi_re = re.compile('\033\\[.*?m') bibtex_entry_def_re = re.compile('@.*?{(.*?),') bibliography_categories = '\\DeclareBibliographyCategory{diff_new_entry}\n' bibliography_categories += '\\DeclareBibliographyCategory{diff_deleted_entry}\n' bibliography_categories += '\\AtEveryBibitem{\\ifcategory{diff_new_entry}{\\color{diffgreen}}{\\ifcategory{diff_deleted_entry}{\\color{diffred}}{\\color{black}}}}\n' added_entries, removed_entries = [], [] for line in bib_proc.stdout.decode().splitlines(): if (match := addition_re.fullmatch(line.strip())): if (entry_def := bibtex_entry_def_re.fullmatch(match.group(1))): added_entries.append(entry_def.group(1)) if (match := deletion_re.fullmatch(line.strip())): if (entry_def := bibtex_entry_def_re.fullmatch(match.group(1))): removed_entries.append(entry_def.group(1)) if added_entries: bibliography_categories += '\\addtocategory{diff_new_entry}{' + ','.join(added_entries) + '}\n' if removed_entries: bibliography_categories += '\\addtocategory{diff_deleted_entry}{' + ','.join(removed_entries) + '}\n' content_started = False document_started = False for line in tex_proc.stdout.decode().splitlines(): if not content_started: if '@@' in line: content_started = True continue line = line.rstrip() if document_started: # diff results in preamble debug = False #'battery' in line if debug: print('orig:', repr(line), file=sys.stderr) def suppress_small_changes(match): nonlocal debug old, new = match.groups() old, _, _rest = old.partition('%') new, _, _rest = new.partition('%') if debug: print(f'old={repr(old)}, new={repr(new)}', file=sys.stderr) #if len(old) < 5 and len(new) < 5: # return new #if old.count(' ') < 2 and new.count(' ') < 2: # return new if old.count('{') != old.count('}') or new.count('{') != new.count('}'): return new new_chars = list(new) hits = 0 for char in old: if char not in string.ascii_letters: continue if char not in new_chars: hits += 1 if hits >= 3: return r' \color{diffred}' + old + r' \color{diffgreen}' + new + ' \color{black}' else: new_chars.remove(char) if any(char in string.ascii_letters for char in new_chars): return r' \color{diffred}' + old + r' \color{diffgreen}' + new + ' \color{black}' return new line = combined_re.sub(suppress_small_changes, line) if debug: print('[1]', line, file=sys.stderr) def suppress_small_changes(match, action): change = match.group(1) change, _, _rest = change.partition('%') #if len(change) <= 3 or change.count(' ') < 2 or change.count('{') != change.count('}'): if change.count('{') != change.count('}'): if action == 'addition': return change else: # deletion return '' if action == 'addition': return r' \color{diffgreen}' + change + r' \color{black}' else: # deletion return r' \color{diffred}' + change + r' \color{black}' line = addition_re.sub(lambda match: suppress_small_changes(match, 'addition'), line) if debug: print('[2]', line, file=sys.stderr) line = deletion_re.sub(lambda match: suppress_small_changes(match, 'deletion'), line) if debug: print('[3]', line, file=sys.stderr) else: if '\\begin{document}' in line: document_started = True print(bibliography_categories) print('\\definecolor{diffgreen}{HTML}{1e8449}') print('\\definecolor{diffred}{HTML}{cb4335}') line = addition_re.sub(r'\1', line) line = deletion_re.sub(r'', line) line = csi_re.sub('', line) print(line) if __name__ == '__main__': generate_git_tex_diff()