summaryrefslogtreecommitdiff
path: root/paper/diffinator.py
blob: aa1e96941e3fb91ad81083e4db7e50c4f6f61c83 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
#!/usr/bin/env python3

import re
import sys
import subprocess
import string

import click


@click.command()
@click.argument('texfile')
@click.argument('bibliography')
@click.argument('revision')
def generate_git_tex_diff(texfile, bibliography, revision):
    with open(texfile) as f:
        tex_lines = len(list(f))
    with open(bibliography) as f:
        bib_lines = len(list(f))

    tex_proc = subprocess.run(['git', 'diff', f'-U{tex_lines+1}', '--word-diff', '--color=always', revision, texfile],
            check=True, capture_output=True)

    bib_proc = subprocess.run(['git', 'diff', f'-U{bib_lines+1}', '--word-diff', '--color=always', revision, bibliography],
            check=True, capture_output=True)
    ADDITION_RE_R = '\033\\[32m\\{\\+([^\033]*?)\\+\\}\033\\[m'
    DELETION_RE_R = '\033\\[31m\\[-([^\033]*?)\\-]\033\\[m'
    addition_re = re.compile(ADDITION_RE_R)
    deletion_re = re.compile(DELETION_RE_R)
    combined_re = re.compile(f'{DELETION_RE_R}{ADDITION_RE_R}')
    csi_re = re.compile('\033\\[.*?m')
    bibtex_entry_def_re = re.compile('@.*?{(.*?),')

    bibliography_categories  = '\\DeclareBibliographyCategory{diff_new_entry}\n'
    bibliography_categories += '\\DeclareBibliographyCategory{diff_deleted_entry}\n'
    bibliography_categories += '\\AtEveryBibitem{\\ifcategory{diff_new_entry}{\\color{diffgreen}}{\\ifcategory{diff_deleted_entry}{\\color{diffred}}{\\color{black}}}}\n'

    added_entries, removed_entries = [], []
    for line in bib_proc.stdout.decode().splitlines():
        if (match := addition_re.fullmatch(line.strip())):
            if (entry_def := bibtex_entry_def_re.fullmatch(match.group(1))):
                added_entries.append(entry_def.group(1))

        if (match := deletion_re.fullmatch(line.strip())):
            if (entry_def := bibtex_entry_def_re.fullmatch(match.group(1))):
                removed_entries.append(entry_def.group(1))

    if added_entries:
        bibliography_categories += '\\addtocategory{diff_new_entry}{' + ','.join(added_entries) + '}\n'
    if removed_entries:
        bibliography_categories += '\\addtocategory{diff_deleted_entry}{' + ','.join(removed_entries) + '}\n'

    content_started = False
    document_started = False
    for line in tex_proc.stdout.decode().splitlines():

        if not content_started:
            if '@@' in line:
                content_started = True
            continue

        line = line.rstrip()
        if document_started: # diff results in preamble

            debug = False #'battery' in line
            if debug:
                print('orig:', repr(line), file=sys.stderr)

            def suppress_small_changes(match):
                nonlocal debug
                old, new = match.groups()

                old, _, _rest = old.partition('%')
                new, _, _rest = new.partition('%')

                if debug:
                    print(f'old={repr(old)}, new={repr(new)}', file=sys.stderr)

                #if len(old) < 5 and len(new) < 5:
                #    return new

                #if old.count(' ') < 2 and new.count(' ') < 2:
                #    return new

                if old.count('{') != old.count('}') or new.count('{') != new.count('}'):
                    return new

                new_chars = list(new)
                hits = 0
                for char in old:
                    if char not in string.ascii_letters:
                        continue

                    if char not in new_chars:
                        hits += 1
                        if hits >= 3:
                            return r' \color{diffred}' + old + r' \color{diffgreen}' + new + ' \color{black}'

                    else:
                        new_chars.remove(char)

                if any(char in string.ascii_letters for char in new_chars):
                    return r' \color{diffred}' + old + r' \color{diffgreen}' + new + ' \color{black}'

                return new

            line = combined_re.sub(suppress_small_changes, line)
            if debug:
                print('[1]', line, file=sys.stderr)

            def suppress_small_changes(match, action):
                change = match.group(1)
                change, _, _rest = change.partition('%')

                #if len(change) <= 3 or change.count(' ') < 2 or change.count('{') != change.count('}'):
                if change.count('{') != change.count('}'):
                    if action == 'addition':
                        return change
                    else: # deletion
                        return ''

                if action == 'addition':
                    return r' \color{diffgreen}' + change + r' \color{black}'
                else: # deletion
                    return r' \color{diffred}' + change + r' \color{black}'

            line = addition_re.sub(lambda match: suppress_small_changes(match, 'addition'), line)
            if debug:
                print('[2]', line, file=sys.stderr)
            line = deletion_re.sub(lambda match: suppress_small_changes(match, 'deletion'), line)
            if debug:
                print('[3]', line, file=sys.stderr)

        else:
            if '\\begin{document}' in line:
                document_started = True
                print(bibliography_categories)
                print('\\definecolor{diffgreen}{HTML}{1e8449}')
                print('\\definecolor{diffred}{HTML}{cb4335}')

            line = addition_re.sub(r'\1', line)
            line = deletion_re.sub(r'', line)

        line = csi_re.sub('', line)
        print(line)

if __name__ == '__main__':
    generate_git_tex_diff()