+import sys
+import pyparsing as pp
+from pyparsing import pyparsing_common as ppc
+parse_suffix_int = lambda lit: int(lit[:-1]) * (10**(3*(1 + 'kmgtpe'.find(lit[-1].lower()))))
+si_suffix = pp.oneOf('k m g t p e', caseless=True)
+numeric_literal = pp.Regex('0x[0-9a-fA-F]+').setName('hex int').setParseAction(pp.tokenMap(int, 16)) \
+ | (pp.Regex('[0-9]+[kKmMgGtTpPeE]')).setName('size int').setParseAction(pp.tokenMap(parse_suffix_int)) \
+ | pp.Word(pp.nums).setName('int').setParseAction(pp.tokenMap(int))
+access_def = pp.Regex('[rR]?[wW]?[xX]?').setName('access literal').setParseAction(pp.tokenMap(str.lower))
+origin_expr = pp.Suppress(pp.CaselessKeyword('ORIGIN')) + EQUALS + numeric_literal
+length_expr = pp.Suppress(pp.CaselessKeyword('LENGTH')) + EQUALS + numeric_literal
+mem_expr = pp.Group(ppc.identifier + LPAREN + access_def + RPAREN + COLON + origin_expr + COMMA + length_expr)
+mem_contents = pp.ZeroOrMore(mem_expr)
+mem_toplevel = pp.CaselessKeyword("MEMORY") + pp.Group(LBRACE + pp.Optional(mem_contents, []) + RBRACE)
+glob = pp.Word(pp.alphanums + '._*')
+match_expr = pp.Forward()
+assignment = pp.Forward()
+funccall = pp.Group(pp.Word(pp.alphas + '_') + LPAREN + (assignment | numeric_literal | match_expr | glob | ppc.identifier) + RPAREN + pp.Optional(SEMICOLON))
+value = numeric_literal | funccall | ppc.identifier | '.'
+formula = (value + pp.oneOf('+ = * / %') + value) | value
+# suppress stray semicolons
+assignment << (SEMICOLON | pp.Group((ppc.identifier | '.') + EQUALS + (formula | value) + pp.Optional(SEMICOLON)))
+match_expr << (glob + LPAREN + pp.OneOrMore(funccall | glob) + RPAREN)
+section_contents = pp.ZeroOrMore(assignment | funccall | match_expr);
+section_name = pp.Regex('\.[a-zA-Z0-9_.]+')
+section_def = pp.Group(section_name + pp.Optional(numeric_literal) + COLON + LBRACE + pp.Group(section_contents) +
+ RBRACE + pp.Optional(RBROK + ppc.identifier + pp.Optional('AT' + RBROK + ppc.identifier)))
+sec_contents = pp.ZeroOrMore(section_def | assignment)
+sections_toplevel = pp.Group(pp.CaselessKeyword("SECTIONS").suppress() + LBRACE + sec_contents + RBRACE)
+toplevel_elements = mem_toplevel | funccall | sections_toplevel | assignment
+ldscript = pp.Group(pp.ZeroOrMore(toplevel_elements))
+if __name__ == '__main__':
+ import argparse
+ parser = argparse.ArgumentParser()
+ parser.add_argument('linker_script', type=argparse.FileType('r'))
+ args = parser.parse_args()
+ #print(mem_expr.parseString('FLASH (rx) : ORIGIN = 0x0800000, LENGTH = 512K', parseAll=True))
+ # print(ldscript.parseString('''
+ # /* Entry Point */
+ # ENTRY(Reset_Handler)
+ #
+ # /* Highest address of the user mode stack */
+ # _estack = 0x20020000; /* end of RAM */
+ # /* Generate a link error if heap and stack don't fit into RAM */
+ # _Min_Heap_Size = 0x200;; /* required amount of heap */
+ # _Min_Stack_Size = 0x400;; /* required amount of stack */
+ # ''', parseAll=True))
+ print(ldscript.parseFile(args.linker_script, parseAll=True))
+ #print(funccall.parseString('KEEP(*(.isr_vector))'))
+ #print(section_contents.parseString('''
+ # . = ALIGN(4);
+ # KEEP(*(.isr_vector)) /* Startup code */
+ # . = ALIGN(4);
+ # ''', parseAll=True))
+ #print(section_def.parseString('''
+ # .text :
+ # {
+ # . = ALIGN(4);
+ # *(.text) /* .text sections (code) */
+ # *(.text*) /* .text* sections (code) */
+ # *(.glue_7) /* glue arm to thumb code */
+ # *(.glue_7t) /* glue thumb to arm code */
+ # *(.eh_frame)
+ #
+ # KEEP (*(.init))
+ # KEEP (*(.fini))
+ #
+ # . = ALIGN(4);
+ # _etext = .; /* define a global symbols at end of code */
+ # } >FLASH
+ # ''', parseAll=True))
+ #print(section_def.parseString('.ARM.extab : { *(.ARM.extab* .gnu.linkonce.armextab.*) } >FLASH', parseAll=True))
+ #print(assignment.parseString('__preinit_array_start = .', parseAll=True))
+ #print(assignment.parseString('a = 23', parseAll=True))
+ #print(funccall.parseString('foo (a=23)', parseAll=True))
+ #print(funccall.parseString('PROVIDE_HIDDEN (__preinit_array_start = .);', parseAll=True))
+ #print(section_def.parseString('''
+ # .preinit_array :
+ # {
+ # PROVIDE_HIDDEN (__preinit_array_start = .);
+ # KEEP (*(.preinit_array*))
+ # PROVIDE_HIDDEN (__preinit_array_end = .);
+ # } >FLASH''', parseAll=True))
+ #print(match_expr.parseString('*(SORT(.init_array.*))', parseAll=True))
+ #print(funccall.parseString('KEEP (*(SORT(.init_array.*)))', parseAll=True))
+ #print(section_def.parseString('''
+ # .init_array :
+ # {
+ # PROVIDE_HIDDEN (__init_array_start = .);
+ # KEEP (*(SORT(.init_array.*)))
+ # KEEP (*(.init_array*))
+ # PROVIDE_HIDDEN (__init_array_end = .);
+ # } >FLASH
+ # ''', parseAll=True))
+ #print(match_expr.parseString('*(.ARM.extab* .gnu.linkonce.armextab.*)', parseAll=True))
+ #print(formula.parseString('. + _Min_Heap_Size', parseAll=True))
+ #print(assignment.parseString('. = . + _Min_Heap_Size;', parseAll=True))
+ #print(sections_toplevel.parseString('''
+ # {
+ # .ARMattributes : { }
+ # }
+ # ''', parseAll=True))
+ #sys.exit(0)
+import tempfile
+import os
+from os import path
+import sys
+import re
+import subprocess
+from contextlib import contextmanager
+from collections import defaultdict
+import cxxfilt
+from elftools.elf.elffile import ELFFile
+from elftools.elf.descriptions import describe_symbol_type
+import libarchive
+def chdir(newdir):
+ old_cwd = os.getcwd()
+ try:
+ os.chdir(newdir)
+ yield
+ finally:
+ os.chdir(old_cwd)
+def trace_source_files(linker, cmdline, trace_sections=[]):
+ with tempfile.TemporaryDirectory() as tempdir:
+ out_path = path.join(tempdir, 'output.elf')
+ output = subprocess.check_output([linker, '-o', out_path, f'-Wl,--cref', *cmdline])
+ lines = [ line.strip() for line in output.decode().splitlines() ]
+ # FIXME also find isr vector table references
+ defs = {}
+ for line in lines[lines.index('Cross Reference Table')+3:]:
+ try:
+ *left, right = line.split()
+ if left:
+ defs[' '.join(left)] = right
+ except:
+ pass
+ refs = defaultdict(lambda: set())
+ syms = {}
+ for sym, obj in defs.items():
+ fn, _, member = re.match('^([^()]+)(\((.+)\))?$', obj).groups()
+ fn = path.abspath(fn)
+ if member:
+ subprocess.check_call(['ar', 'x', '--output', tempdir, fn, member])
+ fn = path.join(tempdir, member)
+ with open(fn, 'rb') as f:
+ elf = ELFFile(f)
+ symtab = elf.get_section_by_name('.symtab')
+ symtab_demangled = { cxxfilt.demangle(' ', ''): i
+ for i, nsym in enumerate(symtab.iter_symbols()) }
+ def lookup_size(name):
+ name_normalized = name.replace(' ', '')
+ if name_normalized in symtab_demangled:
+ entry = symtab.get_symbol(symtab_demangled[name_normalized])
+ return entry['st_size']
+ else:
+ return None
+ syms[sym] = fn, lookup_size(sym)
+ s = set()
+ sec_map = { i for i, sec in enumerate(elf.iter_sections()) }
+ sec_name = f'.rel.text.{sym}'
+ matches = [ i for name, i in sec_map.items() if re.match(f'\.rel\..*\.{sym}', name) ]
+ if matches:
+ sec = elf.get_section(matches[0])
+ for reloc in sec.iter_relocations():
+ refsym = symtab.get_symbol(reloc['r_info_sym'])
+ s.add(
+ if not in defs:
+ syms[] = fn, lookup_size(
+ refs[sym] = s
+ for tsec in trace_sections:
+ matches = [ i for name, i in sec_map.items() if name == f'.rel{tsec}' ]
+ s = set()
+ if matches:
+ sec = elf.get_section(matches[0])
+ for reloc in sec.iter_relocations():
+ refsym = symtab.get_symbol(reloc['r_info_sym'])
+ s.add(
+ refs[tsec.replace('.', '_')] |= s
+ syms_out = set()
+ with open(out_path, 'rb') as f:
+ elf = ELFFile(f)
+ symtab = elf.get_section_by_name('.symtab')
+ for sym in symtab.iter_symbols():
+ if describe_symbol_type(sym['st_info']['type']) == 'FUNC':
+ syms_out.add(
+ #for sym in defs:
+ # entry = symtab.get_symbol_by_name(sym)
+ # if entry is None:
+ # syms[sym] = defs[sym], None
+ # else:
+ # syms[sym] = defs[sym], entry[0]['st_size']
+ return syms, refs, syms_out
+def wrap(leader='', print=print, left='{', right='}'):
+ print(leader, left)
+ yield lambda *args, **kwargs: print(' ', *args, **kwargs)
+ print(right)
+def mangle(name):
+ return re.sub('[^a-zA-Z0-9_]', '_', name)
+if __name__ == '__main__':
+ import argparse
+ parser = argparse.ArgumentParser()
+ parser.add_argument('--trace-sections', type=str, action='append', default=[])
+ parser.add_argument('linker_binary')
+ parser.add_argument('linker_args', nargs=argparse.REMAINDER)
+ args = parser.parse_args()
+ trace_sections = args.trace_sections
+ trace_sections_mangled = { sec.replace('.', '_') for sec in trace_sections }
+ syms, refs, syms_out = trace_source_files(args.linker_binary, args.linker_args, trace_sections)
+ clusters = defaultdict(lambda: [])
+ for sym, (obj, size) in syms.items():
+ clusters[obj].append((sym, size))
+ obj_size = defaultdict(lambda: 0)
+ for name, (obj, size) in syms.items():
+ if size is not None:
+ obj_size[obj] += size
+ with wrap('digraph G', print) as lvl1print:
+ print('rankdir=LR;')
+ print()
+ for i, (obj, syms) in enumerate(clusters.items()):
+ with wrap(f'subgraph cluster_{i}', lvl1print) as lvl2print:
+ lvl2print(f'label = "{obj} <{obj_size[obj]}>";')
+ lvl2print()
+ for sym, size in syms:
+ if sym in syms_out:
+ lvl2print(f'{mangle(sym)}[label = "{sym} <{size}>"];')
+ lvl1print()
+ for start, ends in refs.items():
+ for end in ends:
+ if end and (start in syms_out or start in trace_sections_mangled) and end in syms_out:
+ lvl1print(f'{mangle(start)} -> {mangle(end)};')
+ for sec in trace_sections:
+ lvl1print(f'{sec.replace(".", "_")} [label = "section {sec}"];')
+#!/usr/bin/env python3
+def parse_linker_script(data):
+ pass
+def link(groups):
+ defined_symbols = {}
+ undefined_symbols = set()
+ for group, files in groups:
+ while True:
+ found_something = False
+ for fn in files:
+ symbols = load_symbols(fn)
+ for symbol in symbols:
+ if symbol in defined_symbols:
+ if not group or not found_something:
+ break
+if __name__ == '__main__':
+ import argparse
+ parser = argparse.ArgumentParser()
+ parser.add_argument('-T', '--script', type=str, help='Linker script to use')
+ parser.add_argument('-o', '--output', type=str, help='Output file to produce')
+ args, rest = parser.parse_known_intermixed_args()
+ print(rest)
+ addprefix = lambda *xs: [ prefix + opt for opt in xs for prefix in ('', '-Wl,') ]
+ START_GROUP = addprefix('-(', '--start-group')
+ END_GROUP = addprefix('-)', '--end-group')
+ input_files = [ arg for arg in rest if not arg.startswith('-') or arg in GROUP_OPTS ]
+ def input_file_iter(input_files):
+ group = False
+ files = []
+ for arg in input_files:
+ if arg in START_GROUP:
+ assert not group
+ if files:
+ yield False, files # nested -Wl,--start-group
+ group, files = True, []
+ elif arg in END_GROUP:
+ assert group # missing -Wl,--start-group
+ if files:
+ yield True, files
+ group, files = False, []
+ else:
+ files.append(arg)
+ assert not group # missing -Wl,--end-group
+ if files:
+ yield False, files
+#!/usr/bin/env python3
+import re
+import subprocess
+import tempfile
+import pprint
+ARCHIVE_RE = r'([^(]*)(\([^)]*\))?'
+def trace_source_files(linker, cmdline):
+ with tempfile.NamedTemporaryFile() as mapfile:
+ output = subprocess.check_output([linker, f'-Wl,--Map={}', *cmdline])
+ # intentionally use generator here
+ idx = 0
+ lines = [ line.rstrip() for line in if line.strip() ]
+ for idx, line in enumerate(lines[idx:], start=idx):
+ #print('Dropping', line)
+ if line == 'Linker script and memory map':
+ break
+ idx += 1
+ objects = []
+ symbols = {}
+ sections = {}
+ current_object = None
+ last_offset = None
+ last_symbol = None
+ cont_sec = None
+ cont_ind = None
+ current_section = None
+ for idx, line in enumerate(lines[idx:], start=idx):
+ print(f'Processing >{line}')
+ if line.startswith('LOAD'):
+ _load, obj = line.split()
+ objects.append(obj)
+ continue
+ if line.startswith('OUTPUT'):
+ break
+ m = re.match(r'^( ?)([^ ]+)? +(0x[0-9a-z]+) +(0x[0-9a-z]+)?(.*)?$', line)
+ if m is None:
+ m = re.match(r'^( ?)([^ ]+)?$', line)
+ if m:
+ cont_ind, cont_sec = m.groups()
+ else:
+ cont_ind, cont_sec = None, None
+ last_offset, last_symbol = None, None
+ continue
+ indent, sec, offx, size, sym_or_src = m.groups()
+ if sec is None:
+ sec = cont_sec
+ ind = cont_ind
+ cont_sec = None
+ cont_ind = None
+ print(f'vals: indent={indent} sec={sec} offx={offx} size={size} sym_or_src={sym_or_src}')
+ if not re.match('^[a-zA-Z_0-9<>():*]+$', sym_or_src):
+ continue
+ if indent == '':
+ print(f'Section: {sec} 0x{size:x}')
+ current_section = sec
+ sections[sec] = size
+ last_offset = None
+ last_symbol = None
+ continue
+ if offx is not None:
+ offx = int(offx, 16)
+ if size is not None:
+ size = int(size, 16)
+ if size is not None and sym_or_src is not None:
+ # archive/object line
+ archive, _member = re.match(ARCHIVE_RE, sym_or_src).groups()
+ current_object = archive
+ last_offset = offx
+ else:
+ if sym_or_src is not None:
+ assert size is None
+ if last_offset is not None:
+ last_size = offx - last_offset
+ symbols[last_symbol] = (last_size, current_section)
+ print(f'Symbol: {last_symbol} 0x{last_size:x} @{current_section}')
+ last_offset = offx
+ last_symbol = sym_or_src
+ idx += 1
+ for idx, line in enumerate(lines[idx:], start=idx):
+ if line == 'Cross Reference Table':
+ break
+ idx += 1
+ # map which symbol was pulled from which object in the end
+ used_defs = {}
+ for line in lines:
+ *left, right = line.split()
+ archive, _member = re.match(ARCHIVE_RE, right).groups()
+ if left:
+ used_defs[''.join(left)] = archive
+ #pprint.pprint(symbols)
+if __name__ == '__main__':
+ import argparse
+ parser = argparse.ArgumentParser()
+ parser.add_argument('linker_binary')
+ parser.add_argument('linker_args', nargs=argparse.REMAINDER)
+ args = parser.parse_args()
+ source_files = trace_source_files(args.linker_binary, args.linker_args)
+import re
+from collections import defaultdict, namedtuple
+Section = namedtuple('Section', ['name', 'offset', 'objects'])
+ObjectEntry = namedtuple('ObjectEntry', ['filename', 'object', 'offset', 'size'])
+FileEntry = namedtuple('FileEntry', ['section', 'object', 'offset', 'length'])
+class Memory:
+ def __init__(self, name, origin, length, attrs=''):
+, self.origin, self.length, self.attrs = name, origin, length, attrs
+ self.sections = {}
+ self.files = defaultdict(lambda: [])
+ self.totals = defaultdict(lambda: 0)
+ def add_toplevel(self, name, offx, length):
+ self.sections[name] = Section(offx, length, [])
+ def add_obj(self, name, offx, length, fn, obj):
+ base_section, sep, subsec = name[1:].partition('.')
+ base_section = '.'+base_section
+ if base_section in self.sections:
+ sec = secname, secoffx, secobjs = self.sections[base_section]
+ secobjs.append(ObjectEntry(fn, obj, offx, length))
+ else:
+ sec = None
+ self.files[fn].append(FileEntry(sec, obj, offx, length))
+ self.totals[fn] += length
+class MapFile:
+ def __init__(self, s):
+ self._lines = s.splitlines()
+ self.memcfg = {}
+ self.defaultmem = Memory('default', 0, 0xffffffffffffffff)
+ self._parse()
+ def __getitem__(self, offx_or_name):
+ ''' Lookup a memory area by name or address '''
+ if offx_or_name in self.memcfg:
+ return self.memcfg[offx_or_name]
+ elif isinstance(offx_or_name, int):
+ for mem in self.memcfg.values():
+ if mem.origin <= offx_or_name < mem.origin+mem.length:
+ return mem
+ else:
+ return self.defaultmem
+ raise ValueError('Invalid argument type for indexing')
+ def _skip(self, regex):
+ matcher = re.compile(regex)
+ for l in self:
+ if matcher.match(l):
+ break
+ def __iter__(self):
+ while self._lines:
+ yield self._lines.pop(0)
+ def _parse(self):
+ self._skip('^Memory Configuration')
+ # Parse memory segmentation info
+ self._skip('^Name')
+ for l in self:
+ if not l:
+ break
+ name, origin, length, *attrs = l.split()
+ if not name.startswith('*'):
+ self.memcfg[name] = Memory(name, int(origin, 16), int(length, 16), attrs[0] if attrs else '')
+ # Parse section information
+ toplevel_m = re.compile('^(\.[a-zA-Z0-9_.]+)\s+(0x[0-9a-fA-F]+)\s+(0x[0-9a-fA-F]+)')
+ secondlevel_m = re.compile('^ (\.[a-zA-Z0-9_.]+)\s+(0x[0-9a-fA-F]+)\s+(0x[0-9a-fA-F]+)\s+(.*)$')
+ secondlevel_linebreak_m = re.compile('^ (\.[a-zA-Z0-9_.]+)\n')
+ filelike = re.compile('^(/?[^()]*\.[a-zA-Z0-9-_]+)(\(.*\))?')
+ linebreak_section = None
+ for l in self:
+ # Toplevel section
+ match = toplevel_m.match(l)
+ if match:
+ name, offx, length = match.groups()
+ offx, length = int(offx, 16), int(length, 16)
+ self[offx].add_toplevel(name, offx, length)
+ match = secondlevel_linebreak_m.match(l)
+ if match:
+ linebreak_section, = match.groups()
+ continue
+ if linebreak_section:
+ l = ' {} {}'.format(linebreak_section, l)
+ linebreak_section = None
+ # Second-level section
+ match = secondlevel_m.match(l)
+ if match:
+ name, offx, length, misc = match.groups()
+ match = filelike.match(misc)
+ if match:
+ fn, obj = match.groups()
+ obj = obj.strip('()') if obj else None
+ offx, length = int(offx, 16), int(length, 16)
+ self[offx].add_obj(name, offx, length, fn, obj)
+if __name__ == '__main__':
+ import argparse
+ parser = argparse.ArgumentParser(description='Parser GCC map file')
+ parser.add_argument('mapfile', type=argparse.FileType('r'), help='The GCC .map file to parse')
+ parser.add_argument('-m', '--memory', type=str, help='The memory segments to print, comma-separated')
+ args = parser.parse_args()
+ mf = MapFile(
+ args.mapfile.close()
+ mems = args.memory.split(',') if args.memory else mf.memcfg.keys()
+ for name in mems:
+ mem = mf.memcfg[name]
+ print('Symbols by file for memory', name)
+ for tot, fn in reversed(sorted( (tot, fn) for fn, tot in mem.totals.items() )):
+ print(' {:>8} {}'.format(tot, fn))
+ for length, offx, sec, obj in reversed(sorted(( (length, offx, sec, obj) for sec, obj, offx, length in
+ mem.files[fn] ), key=lambda e: e[0] )):
+ name = if sec else None
+ print(' {:>8} {:>#08x} {}'.format(length, offx, obj))
+ #print('{:>16} 0x{:016x} 0x{:016x} ({:>24}) {}'.format(name, origin, length, length, attrs))