From bf7e8701c7214d114406d4387cda1d24d2797bce Mon Sep 17 00:00:00 2001 From: jaseg Date: Thu, 12 Mar 2020 21:59:45 +0100 Subject: Add size tracing tool --- controller/fw/tools/ldparser.py | 126 +++++++++++++++++++++++++++++ controller/fw/tools/linkmem.py | 161 ++++++++++++++++++++++++++++++++++++++ controller/fw/tools/linksize.py | 62 +++++++++++++++ controller/fw/tools/linktracer.py | 118 ++++++++++++++++++++++++++++ controller/fw/tools/mapparse.py | 129 ++++++++++++++++++++++++++++++ 5 files changed, 596 insertions(+) create mode 100644 controller/fw/tools/ldparser.py create mode 100644 controller/fw/tools/linkmem.py create mode 100644 controller/fw/tools/linksize.py create mode 100644 controller/fw/tools/linktracer.py create mode 100644 controller/fw/tools/mapparse.py diff --git a/controller/fw/tools/ldparser.py b/controller/fw/tools/ldparser.py new file mode 100644 index 0000000..c620fe2 --- /dev/null +++ b/controller/fw/tools/ldparser.py @@ -0,0 +1,126 @@ + +import sys + +import pyparsing as pp +from pyparsing import pyparsing_common as ppc + +LPAREN, RPAREN, LBRACE, RBRACE, LBROK, RBROK, COLON, SEMICOLON, EQUALS, COMMA = map(pp.Suppress, '(){}<>:;=,') + +parse_suffix_int = lambda lit: int(lit[:-1]) * (10**(3*(1 + 'kmgtpe'.find(lit[-1].lower())))) +si_suffix = pp.oneOf('k m g t p e', caseless=True) + +numeric_literal = pp.Regex('0x[0-9a-fA-F]+').setName('hex int').setParseAction(pp.tokenMap(int, 16)) \ + | (pp.Regex('[0-9]+[kKmMgGtTpPeE]')).setName('size int').setParseAction(pp.tokenMap(parse_suffix_int)) \ + | pp.Word(pp.nums).setName('int').setParseAction(pp.tokenMap(int)) +access_def = pp.Regex('[rR]?[wW]?[xX]?').setName('access literal').setParseAction(pp.tokenMap(str.lower)) + +origin_expr = pp.Suppress(pp.CaselessKeyword('ORIGIN')) + EQUALS + numeric_literal +length_expr = pp.Suppress(pp.CaselessKeyword('LENGTH')) + EQUALS + numeric_literal +mem_expr = pp.Group(ppc.identifier + LPAREN + access_def + RPAREN + COLON + origin_expr + COMMA + length_expr) +mem_contents = pp.ZeroOrMore(mem_expr) + +mem_toplevel = pp.CaselessKeyword("MEMORY") + pp.Group(LBRACE + pp.Optional(mem_contents, []) + RBRACE) + +glob = pp.Word(pp.alphanums + '._*') +match_expr = pp.Forward() +assignment = pp.Forward() +funccall = pp.Group(pp.Word(pp.alphas + '_') + LPAREN + (assignment | numeric_literal | match_expr | glob | ppc.identifier) + RPAREN + pp.Optional(SEMICOLON)) +value = numeric_literal | funccall | ppc.identifier | '.' +formula = (value + pp.oneOf('+ = * / %') + value) | value +# suppress stray semicolons +assignment << (SEMICOLON | pp.Group((ppc.identifier | '.') + EQUALS + (formula | value) + pp.Optional(SEMICOLON))) +match_expr << (glob + LPAREN + pp.OneOrMore(funccall | glob) + RPAREN) + +section_contents = pp.ZeroOrMore(assignment | funccall | match_expr); + +section_name = pp.Regex('\.[a-zA-Z0-9_.]+') +section_def = pp.Group(section_name + pp.Optional(numeric_literal) + COLON + LBRACE + pp.Group(section_contents) + + RBRACE + pp.Optional(RBROK + ppc.identifier + pp.Optional('AT' + RBROK + ppc.identifier))) +sec_contents = pp.ZeroOrMore(section_def | assignment) + +sections_toplevel = pp.Group(pp.CaselessKeyword("SECTIONS").suppress() + LBRACE + sec_contents + RBRACE) + +toplevel_elements = mem_toplevel | funccall | sections_toplevel | assignment +ldscript = pp.Group(pp.ZeroOrMore(toplevel_elements)) +ldscript.ignore(pp.cppStyleComment) + +if __name__ == '__main__': + import argparse + parser = argparse.ArgumentParser() + parser.add_argument('linker_script', type=argparse.FileType('r')) + args = parser.parse_args() + + #print(mem_expr.parseString('FLASH (rx) : ORIGIN = 0x0800000, LENGTH = 512K', parseAll=True)) + # print(ldscript.parseString(''' + # /* Entry Point */ + # ENTRY(Reset_Handler) + # + # /* Highest address of the user mode stack */ + # _estack = 0x20020000; /* end of RAM */ + # /* Generate a link error if heap and stack don't fit into RAM */ + # _Min_Heap_Size = 0x200;; /* required amount of heap */ + # _Min_Stack_Size = 0x400;; /* required amount of stack */ + # ''', parseAll=True)) + + print(ldscript.parseFile(args.linker_script, parseAll=True)) + #print(funccall.parseString('KEEP(*(.isr_vector))')) + #print(section_contents.parseString(''' + # . = ALIGN(4); + # KEEP(*(.isr_vector)) /* Startup code */ + # . = ALIGN(4); + # ''', parseAll=True)) + + #print(section_def.parseString(''' + # .text : + # { + # . = ALIGN(4); + # *(.text) /* .text sections (code) */ + # *(.text*) /* .text* sections (code) */ + # *(.glue_7) /* glue arm to thumb code */ + # *(.glue_7t) /* glue thumb to arm code */ + # *(.eh_frame) + # + # KEEP (*(.init)) + # KEEP (*(.fini)) + # + # . = ALIGN(4); + # _etext = .; /* define a global symbols at end of code */ + # } >FLASH + # ''', parseAll=True)) + + #print(section_def.parseString('.ARM.extab : { *(.ARM.extab* .gnu.linkonce.armextab.*) } >FLASH', parseAll=True)) + + #print(assignment.parseString('__preinit_array_start = .', parseAll=True)) + #print(assignment.parseString('a = 23', parseAll=True)) + #print(funccall.parseString('foo (a=23)', parseAll=True)) + #print(funccall.parseString('PROVIDE_HIDDEN (__preinit_array_start = .);', parseAll=True)) + #print(section_def.parseString(''' + # .preinit_array : + # { + # PROVIDE_HIDDEN (__preinit_array_start = .); + # KEEP (*(.preinit_array*)) + # PROVIDE_HIDDEN (__preinit_array_end = .); + # } >FLASH''', parseAll=True)) + #print(match_expr.parseString('*(SORT(.init_array.*))', parseAll=True)) + #print(funccall.parseString('KEEP (*(SORT(.init_array.*)))', parseAll=True)) + #print(section_def.parseString(''' + # .init_array : + # { + # PROVIDE_HIDDEN (__init_array_start = .); + # KEEP (*(SORT(.init_array.*))) + # KEEP (*(.init_array*)) + # PROVIDE_HIDDEN (__init_array_end = .); + # } >FLASH + # ''', parseAll=True)) + + #print(match_expr.parseString('*(.ARM.extab* .gnu.linkonce.armextab.*)', parseAll=True)) + #print(formula.parseString('. + _Min_Heap_Size', parseAll=True)) + #print(assignment.parseString('. = . + _Min_Heap_Size;', parseAll=True)) + #print(sections_toplevel.parseString(''' + # SECTIONS + # { + # .ARMattributes : { } + # } + # ''', parseAll=True)) + #sys.exit(0) + diff --git a/controller/fw/tools/linkmem.py b/controller/fw/tools/linkmem.py new file mode 100644 index 0000000..a04f31e --- /dev/null +++ b/controller/fw/tools/linkmem.py @@ -0,0 +1,161 @@ + +import tempfile +import os +from os import path +import sys +import re +import subprocess +from contextlib import contextmanager +from collections import defaultdict + +import cxxfilt + +from elftools.elf.elffile import ELFFile +from elftools.elf.descriptions import describe_symbol_type +import libarchive + +@contextmanager +def chdir(newdir): + old_cwd = os.getcwd() + try: + os.chdir(newdir) + yield + finally: + os.chdir(old_cwd) + + +def trace_source_files(linker, cmdline, trace_sections=[]): + with tempfile.TemporaryDirectory() as tempdir: + out_path = path.join(tempdir, 'output.elf') + output = subprocess.check_output([linker, '-o', out_path, f'-Wl,--cref', *cmdline]) + lines = [ line.strip() for line in output.decode().splitlines() ] + # FIXME also find isr vector table references + + defs = {} + for line in lines[lines.index('Cross Reference Table')+3:]: + try: + *left, right = line.split() + if left: + defs[' '.join(left)] = right + except: + pass + + refs = defaultdict(lambda: set()) + syms = {} + for sym, obj in defs.items(): + fn, _, member = re.match('^([^()]+)(\((.+)\))?$', obj).groups() + fn = path.abspath(fn) + + if member: + subprocess.check_call(['ar', 'x', '--output', tempdir, fn, member]) + fn = path.join(tempdir, member) + + with open(fn, 'rb') as f: + elf = ELFFile(f) + + symtab = elf.get_section_by_name('.symtab') + + symtab_demangled = { cxxfilt.demangle(nsym.name).replace(' ', ''): i + for i, nsym in enumerate(symtab.iter_symbols()) } + + def lookup_size(name): + name_normalized = name.replace(' ', '') + if name_normalized in symtab_demangled: + entry = symtab.get_symbol(symtab_demangled[name_normalized]) + return entry['st_size'] + else: + return None + + syms[sym] = fn, lookup_size(sym) + + s = set() + sec_map = { sec.name: i for i, sec in enumerate(elf.iter_sections()) } + sec_name = f'.rel.text.{sym}' + matches = [ i for name, i in sec_map.items() if re.match(f'\.rel\..*\.{sym}', name) ] + if matches: + sec = elf.get_section(matches[0]) + for reloc in sec.iter_relocations(): + refsym = symtab.get_symbol(reloc['r_info_sym']) + s.add(refsym.name) + + if refsym.name not in defs: + syms[refsym.name] = fn, lookup_size(refsym.name) + refs[sym] = s + + for tsec in trace_sections: + matches = [ i for name, i in sec_map.items() if name == f'.rel{tsec}' ] + s = set() + if matches: + sec = elf.get_section(matches[0]) + for reloc in sec.iter_relocations(): + refsym = symtab.get_symbol(reloc['r_info_sym']) + s.add(refsym.name) + refs[tsec.replace('.', '_')] |= s + + syms_out = set() + with open(out_path, 'rb') as f: + elf = ELFFile(f) + symtab = elf.get_section_by_name('.symtab') + for sym in symtab.iter_symbols(): + if describe_symbol_type(sym['st_info']['type']) == 'FUNC': + syms_out.add(sym.name) + #for sym in defs: + # entry = symtab.get_symbol_by_name(sym) + # if entry is None: + # syms[sym] = defs[sym], None + # else: + # syms[sym] = defs[sym], entry[0]['st_size'] + + return syms, refs, syms_out + +@contextmanager +def wrap(leader='', print=print, left='{', right='}'): + print(leader, left) + yield lambda *args, **kwargs: print(' ', *args, **kwargs) + print(right) + +def mangle(name): + return re.sub('[^a-zA-Z0-9_]', '_', name) + +if __name__ == '__main__': + import argparse + parser = argparse.ArgumentParser() + parser.add_argument('--trace-sections', type=str, action='append', default=[]) + parser.add_argument('linker_binary') + parser.add_argument('linker_args', nargs=argparse.REMAINDER) + args = parser.parse_args() + + trace_sections = args.trace_sections + trace_sections_mangled = { sec.replace('.', '_') for sec in trace_sections } + syms, refs, syms_out = trace_source_files(args.linker_binary, args.linker_args, trace_sections) + + clusters = defaultdict(lambda: []) + for sym, (obj, size) in syms.items(): + clusters[obj].append((sym, size)) + + obj_size = defaultdict(lambda: 0) + for name, (obj, size) in syms.items(): + if size is not None: + obj_size[obj] += size + + with wrap('digraph G', print) as lvl1print: + print('rankdir=LR;') + print() + + for i, (obj, syms) in enumerate(clusters.items()): + with wrap(f'subgraph cluster_{i}', lvl1print) as lvl2print: + lvl2print(f'label = "{obj} <{obj_size[obj]}>";') + lvl2print() + for sym, size in syms: + if sym in syms_out: + lvl2print(f'{mangle(sym)}[label = "{sym} <{size}>"];') + lvl1print() + + for start, ends in refs.items(): + for end in ends: + if end and (start in syms_out or start in trace_sections_mangled) and end in syms_out: + lvl1print(f'{mangle(start)} -> {mangle(end)};') + + for sec in trace_sections: + lvl1print(f'{sec.replace(".", "_")} [label = "section {sec}"];') + diff --git a/controller/fw/tools/linksize.py b/controller/fw/tools/linksize.py new file mode 100644 index 0000000..c41a951 --- /dev/null +++ b/controller/fw/tools/linksize.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python3 + +def parse_linker_script(data): + pass + +def link(groups): + defined_symbols = {} + undefined_symbols = set() + for group, files in groups: + while True: + found_something = False + + for fn in files: + symbols = load_symbols(fn) + for symbol in symbols: + if symbol in defined_symbols: + + if not group or not found_something: + break + + +if __name__ == '__main__': + + import argparse + parser = argparse.ArgumentParser() + parser.add_argument('-T', '--script', type=str, help='Linker script to use') + parser.add_argument('-o', '--output', type=str, help='Output file to produce') + args, rest = parser.parse_known_intermixed_args() + print(rest) + + addprefix = lambda *xs: [ prefix + opt for opt in xs for prefix in ('', '-Wl,') ] + START_GROUP = addprefix('-(', '--start-group') + END_GROUP = addprefix('-)', '--end-group') + GROUP_OPTS = [*START_GROUP, *END_GROUP] + input_files = [ arg for arg in rest if not arg.startswith('-') or arg in GROUP_OPTS ] + + def input_file_iter(input_files): + group = False + files = [] + for arg in input_files: + if arg in START_GROUP: + assert not group + + if files: + yield False, files # nested -Wl,--start-group + group, files = True, [] + + elif arg in END_GROUP: + assert group # missing -Wl,--start-group + if files: + yield True, files + group, files = False, [] + + else: + files.append(arg) + + assert not group # missing -Wl,--end-group + if files: + yield False, files + + + diff --git a/controller/fw/tools/linktracer.py b/controller/fw/tools/linktracer.py new file mode 100644 index 0000000..0c53a60 --- /dev/null +++ b/controller/fw/tools/linktracer.py @@ -0,0 +1,118 @@ +#!/usr/bin/env python3 + +import re +import subprocess +import tempfile +import pprint + +ARCHIVE_RE = r'([^(]*)(\([^)]*\))?' + +def trace_source_files(linker, cmdline): + with tempfile.NamedTemporaryFile() as mapfile: + output = subprocess.check_output([linker, f'-Wl,--Map={mapfile.name}', *cmdline]) + + # intentionally use generator here + idx = 0 + lines = [ line.rstrip() for line in mapfile.read().decode().splitlines() if line.strip() ] + + for idx, line in enumerate(lines[idx:], start=idx): + #print('Dropping', line) + if line == 'Linker script and memory map': + break + + idx += 1 + objects = [] + symbols = {} + sections = {} + current_object = None + last_offset = None + last_symbol = None + cont_sec = None + cont_ind = None + current_section = None + for idx, line in enumerate(lines[idx:], start=idx): + print(f'Processing >{line}') + if line.startswith('LOAD'): + _load, obj = line.split() + objects.append(obj) + continue + + if line.startswith('OUTPUT'): + break + + m = re.match(r'^( ?)([^ ]+)? +(0x[0-9a-z]+) +(0x[0-9a-z]+)?(.*)?$', line) + if m is None: + m = re.match(r'^( ?)([^ ]+)?$', line) + if m: + cont_ind, cont_sec = m.groups() + else: + cont_ind, cont_sec = None, None + last_offset, last_symbol = None, None + continue + indent, sec, offx, size, sym_or_src = m.groups() + if sec is None: + sec = cont_sec + ind = cont_ind + cont_sec = None + cont_ind = None + print(f'vals: indent={indent} sec={sec} offx={offx} size={size} sym_or_src={sym_or_src}') + if not re.match('^[a-zA-Z_0-9<>():*]+$', sym_or_src): + continue + + if indent == '': + print(f'Section: {sec} 0x{size:x}') + current_section = sec + sections[sec] = size + last_offset = None + last_symbol = None + continue + + if offx is not None: + offx = int(offx, 16) + if size is not None: + size = int(size, 16) + + if size is not None and sym_or_src is not None: + # archive/object line + archive, _member = re.match(ARCHIVE_RE, sym_or_src).groups() + current_object = archive + last_offset = offx + else: + if sym_or_src is not None: + assert size is None + if last_offset is not None: + last_size = offx - last_offset + symbols[last_symbol] = (last_size, current_section) + print(f'Symbol: {last_symbol} 0x{last_size:x} @{current_section}') + last_offset = offx + last_symbol = sym_or_src + + idx += 1 + + for idx, line in enumerate(lines[idx:], start=idx): + if line == 'Cross Reference Table': + break + + idx += 1 + + # map which symbol was pulled from which object in the end + used_defs = {} + for line in lines: + *left, right = line.split() + + archive, _member = re.match(ARCHIVE_RE, right).groups() + if left: + used_defs[''.join(left)] = archive + + #pprint.pprint(symbols) + + +if __name__ == '__main__': + import argparse + parser = argparse.ArgumentParser() + parser.add_argument('linker_binary') + parser.add_argument('linker_args', nargs=argparse.REMAINDER) + args = parser.parse_args() + + source_files = trace_source_files(args.linker_binary, args.linker_args) + diff --git a/controller/fw/tools/mapparse.py b/controller/fw/tools/mapparse.py new file mode 100644 index 0000000..c1f460a --- /dev/null +++ b/controller/fw/tools/mapparse.py @@ -0,0 +1,129 @@ + +import re +from collections import defaultdict, namedtuple + +Section = namedtuple('Section', ['name', 'offset', 'objects']) +ObjectEntry = namedtuple('ObjectEntry', ['filename', 'object', 'offset', 'size']) +FileEntry = namedtuple('FileEntry', ['section', 'object', 'offset', 'length']) + +class Memory: + def __init__(self, name, origin, length, attrs=''): + self.name, self.origin, self.length, self.attrs = name, origin, length, attrs + self.sections = {} + self.files = defaultdict(lambda: []) + self.totals = defaultdict(lambda: 0) + + def add_toplevel(self, name, offx, length): + self.sections[name] = Section(offx, length, []) + + def add_obj(self, name, offx, length, fn, obj): + base_section, sep, subsec = name[1:].partition('.') + base_section = '.'+base_section + if base_section in self.sections: + sec = secname, secoffx, secobjs = self.sections[base_section] + secobjs.append(ObjectEntry(fn, obj, offx, length)) + else: + sec = None + self.files[fn].append(FileEntry(sec, obj, offx, length)) + self.totals[fn] += length + +class MapFile: + def __init__(self, s): + self._lines = s.splitlines() + self.memcfg = {} + self.defaultmem = Memory('default', 0, 0xffffffffffffffff) + self._parse() + + def __getitem__(self, offx_or_name): + ''' Lookup a memory area by name or address ''' + if offx_or_name in self.memcfg: + return self.memcfg[offx_or_name] + + elif isinstance(offx_or_name, int): + for mem in self.memcfg.values(): + if mem.origin <= offx_or_name < mem.origin+mem.length: + return mem + else: + return self.defaultmem + + raise ValueError('Invalid argument type for indexing') + + def _skip(self, regex): + matcher = re.compile(regex) + for l in self: + if matcher.match(l): + break + + def __iter__(self): + while self._lines: + yield self._lines.pop(0) + + def _parse(self): + self._skip('^Memory Configuration') + + # Parse memory segmentation info + self._skip('^Name') + for l in self: + if not l: + break + name, origin, length, *attrs = l.split() + if not name.startswith('*'): + self.memcfg[name] = Memory(name, int(origin, 16), int(length, 16), attrs[0] if attrs else '') + + # Parse section information + toplevel_m = re.compile('^(\.[a-zA-Z0-9_.]+)\s+(0x[0-9a-fA-F]+)\s+(0x[0-9a-fA-F]+)') + secondlevel_m = re.compile('^ (\.[a-zA-Z0-9_.]+)\s+(0x[0-9a-fA-F]+)\s+(0x[0-9a-fA-F]+)\s+(.*)$') + secondlevel_linebreak_m = re.compile('^ (\.[a-zA-Z0-9_.]+)\n') + filelike = re.compile('^(/?[^()]*\.[a-zA-Z0-9-_]+)(\(.*\))?') + linebreak_section = None + for l in self: + # Toplevel section + match = toplevel_m.match(l) + if match: + name, offx, length = match.groups() + offx, length = int(offx, 16), int(length, 16) + self[offx].add_toplevel(name, offx, length) + + match = secondlevel_linebreak_m.match(l) + if match: + linebreak_section, = match.groups() + continue + + if linebreak_section: + l = ' {} {}'.format(linebreak_section, l) + linebreak_section = None + + # Second-level section + match = secondlevel_m.match(l) + if match: + name, offx, length, misc = match.groups() + match = filelike.match(misc) + if match: + fn, obj = match.groups() + obj = obj.strip('()') if obj else None + offx, length = int(offx, 16), int(length, 16) + self[offx].add_obj(name, offx, length, fn, obj) + + +if __name__ == '__main__': + import argparse + parser = argparse.ArgumentParser(description='Parser GCC map file') + parser.add_argument('mapfile', type=argparse.FileType('r'), help='The GCC .map file to parse') + parser.add_argument('-m', '--memory', type=str, help='The memory segments to print, comma-separated') + args = parser.parse_args() + mf = MapFile(args.mapfile.read()) + args.mapfile.close() + + mems = args.memory.split(',') if args.memory else mf.memcfg.keys() + + for name in mems: + mem = mf.memcfg[name] + print('Symbols by file for memory', name) + for tot, fn in reversed(sorted( (tot, fn) for fn, tot in mem.totals.items() )): + print(' {:>8} {}'.format(tot, fn)) + for length, offx, sec, obj in reversed(sorted(( (length, offx, sec, obj) for sec, obj, offx, length in + mem.files[fn] ), key=lambda e: e[0] )): + name = sec.name if sec else None + print(' {:>8} {:>#08x} {}'.format(length, offx, obj)) + #print('{:>16} 0x{:016x} 0x{:016x} ({:>24}) {}'.format(name, origin, length, length, attrs)) + -- cgit