import sys
import difflib
import argparse
import StringIO
import pygments
from pygments.lexers import guess_lexer_for_filename
from pygments.lexer import RegexLexer
from pygments.formatters import HtmlFormatter
from pygments.token import *
class DefaultLexer(RegexLexer):
"""
Simply lex each line as a token.
"""
name = 'Default'
aliases = ['default']
filenames = ['*']
tokens = {
'root': [
(r'.*\n', Text),
]
}
class DiffHtmlFormatter(HtmlFormatter):
"""
Formats a single source file with pygments and adds diff highlights based on the
diff details given.
"""
isLeft = False
diffs = None
def __init__(self, isLeft, diffs, *args, **kwargs):
self.isLeft = isLeft
self.diffs = diffs
super(DiffHtmlFormatter, self).__init__(*args, **kwargs)
def wrap(self, source, outfile):
return self._wrap_code(source)
def getDiffLineNos(self):
retlinenos = []
for idx, ((left_no, left_line),(right_no, right_line),change) in enumerate(self.diffs):
no = None
if self.isLeft:
if change:
if isinstance(left_no, int) and isinstance(right_no, int):
no = '' + str(left_no) + ""
elif isinstance(left_no, int) and not isinstance(right_no, int):
no = '' + str(left_no) + ""
elif not isinstance(left_no, int) and isinstance(right_no, int):
no = ' '
else:
no = '' + str(left_no) + ""
else:
if change:
if isinstance(left_no, int) and isinstance(right_no, int):
no = '' + str(right_no) + ""
elif isinstance(left_no, int) and not isinstance(right_no, int):
no = ' '
elif not isinstance(left_no, int) and isinstance(right_no, int):
no = '' + str(right_no) + ""
else:
no = '' + str(right_no) + ""
retlinenos.append(no)
return retlinenos
def _wrap_code(self, source):
source = list(source)
yield 0, '
'
for idx, ((left_no, left_line),(right_no, right_line),change) in enumerate(self.diffs):
#print idx, ((left_no, left_line),(right_no, right_line),change)
try:
if self.isLeft:
if change:
if isinstance(left_no, int) and isinstance(right_no, int) and left_no <= len(source):
i,t = source[left_no-1]
t = '' + t + ""
elif isinstance(left_no, int) and not isinstance(right_no, int) and left_no <= len(source):
i,t = source[left_no-1]
t = '' + t + ""
elif not isinstance(left_no, int) and isinstance(right_no, int):
i,t = 1, left_line
t = '' + t + ""
else:
raise
else:
if left_no <= len(source):
i,t = source[left_no-1]
else:
i = 1
t = left_line
else:
if change:
if isinstance(left_no, int) and isinstance(right_no, int) and right_no <= len(source):
i,t = source[right_no-1]
t = '' + t + ""
elif isinstance(left_no, int) and not isinstance(right_no, int):
i,t = 1, right_line
t = '' + t + ""
elif not isinstance(left_no, int) and isinstance(right_no, int) and right_no <= len(source):
i,t = source[right_no-1]
t = '' + t + ""
else:
raise
else:
if right_no <= len(source):
i,t = source[right_no-1]
else:
i = 1
t = right_line
yield i, t
except:
#print "WARNING! failed to enumerate diffs fully!"
pass # this is expected sometimes
yield 0, '\n
'
def _wrap_tablelinenos(self, inner):
dummyoutfile = StringIO.StringIO()
lncount = 0
for t, line in inner:
if t:
lncount += 1
dummyoutfile.write(line)
fl = self.linenostart
mw = len(str(lncount + fl - 1))
sp = self.linenospecial
st = self.linenostep
la = self.lineanchors
aln = self.anchorlinenos
nocls = self.noclasses
lines = []
for i in self.getDiffLineNos():
lines.append('%s' % (i,))
ls = ''.join(lines)
# in case you wonder about the seemingly redundant here: since the
# content in the other cell also is wrapped in a div, some browsers in
# some configurations seem to mess up the formatting...
if nocls:
yield 0, ('
' % self.cssclass +
' | ')
else:
yield 0, ('' % self.cssclass +
' | ')
yield 0, dummyoutfile.getvalue()
yield 0, ' | '
class CodeDiff(object):
"""
Manages a pair of source files and generates a single html diff page comparing
the contents.
"""
pygmentsStyleOpt = "vs"
pygmentsCssFile="./deps/codeformats/%s.css" % pygmentsStyleOpt
diffCssFile="./deps/diff.css"
diffJsFile="./deps/diff.js"
resetCssFile="./deps/reset.css"
semanticCssFile="./deps/semantic.min.css"
semanticJsFile="./deps/semantic.min.js"
jqueryJsFile="./deps/jquery.min.js"
commentJsFile="./deps/comment.js"
def __init__(self, fromfile, tofile, fromtxt=None, totxt=None, name=None):
self.filename = name
self.fromfile = fromfile
if fromtxt == None:
self.fromlines = open(fromfile, 'U').readlines()
else:
self.fromlines = [n + "\n" for n in fromtxt.split("\n")]
self.leftcode = "".join(self.fromlines)
self.tofile = tofile
if totxt == None:
self.tolines = open(tofile, 'U').readlines()
else:
self.tolines = [n + "\n" for n in totxt.split("\n")]
self.rightcode = "".join(self.tolines)
def getDiffDetails(self, fromdesc='', todesc='', context=False, numlines=5, tabSize=8):
# change tabs to spaces before it gets more difficult after we insert
# markkup
def expand_tabs(line):
# hide real spaces
line = line.replace(' ','\0')
# expand tabs into spaces
line = line.expandtabs(tabSize)
# replace spaces from expanded tabs back into tab characters
# (we'll replace them with markup after we do differencing)
line = line.replace(' ','\t')
return line.replace('\0',' ').rstrip('\n')
self.fromlines = [expand_tabs(line) for line in self.fromlines]
self.tolines = [expand_tabs(line) for line in self.tolines]
# create diffs iterator which generates side by side from/to data
if context:
context_lines = numlines
else:
context_lines = None
diffs = difflib._mdiff(self.fromlines, self.tolines, context_lines, linejunk=None, charjunk=difflib.IS_CHARACTER_JUNK)
return list(diffs)
def format(self, verbose=False):
self.diffs = self.getDiffDetails(self.fromfile, self.tofile)
if verbose:
for diff in self.diffs:
print "%-6s %-80s %-80s" % ( diff[2], diff[0], diff[1] )
fields = ( (self.leftcode, True, self.fromfile) , (self.rightcode, False, self.tofile) )
codeContents = []
for (code, isLeft, filename) in fields:
inst = DiffHtmlFormatter(isLeft,
self.diffs,
nobackground=False,
linenos=True,
style=self.pygmentsStyleOpt)
try:
self.lexer = guess_lexer_for_filename(self.filename, code)
except pygments.util.ClassNotFound:
if verbose:
print "No Lexer Found! Using default..."
self.lexer = DefaultLexer()
formatted = pygments.highlight(code, self.lexer, inst)
codeContents.append(formatted)
diffTemplate = open("./templates/diff_template.html",'r').read()
answers = {
"html_title": self.filename,
"reset_css": self.resetCssFile,
"pygments_css": self.pygmentsCssFile,
"diff_css": self.diffCssFile,
"semantic_css": self.semanticCssFile,
"page_title": self.filename,
"original_code": codeContents[0],
"modified_code": codeContents[1],
"jquery_js": self.jqueryJsFile,
"semantic_js": self.semanticJsFile,
"diff_js": self.diffJsFile,
"comment_js": self.commentJsFile,
}
self.htmlContents = diffTemplate % answers
def write(self, path="index.html"):
fh = open(path,'w')
fh.write(self.htmlContents.encode('utf8'))
fh.close()
def main(fromfile, tofile, verbose=False):
codeDiff = CodeDiff(fromfile, tofile, name=tofile)
codeDiff.format(verbose)
codeDiff.write()
if __name__ == "__main__":
description = """Given two source files this application\
creates an html page which highlights the differences between the two. """
parser = argparse.ArgumentParser(description=description)
parser.add_argument('-v', action='store_true', help='show verbose output.')
parser.add_argument('file1', help='source file to compare ("before" file).')
parser.add_argument('file2', help='source file to compare ("after" file).')
args = parser.parse_args()
main(args.file1, args.file2, args.v)
|