summaryrefslogtreecommitdiffstats
path: root/tools/symalyzer.py
blob: cff21f9f9339e93d1b06db4d454f6170579f37c2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
#!/usr/bin/python3
#
# 2019 by David Lamparter, placed in public domain
#
# This tool generates a report of possibly unused symbols in the build.  It's
# particularly useful for libfrr to find bitrotting functions that aren't even
# used anywhere anymore.
#
# Note that the tool can't distinguish between "a symbol is completely unused"
# and "a symbol is used only in its file" since file-internal references are
# invisible in nm output.  However, the compiler will warn you if a static
# symbol is unused.
#
# This tool is only tested on Linux, it probably needs `nm` from GNU binutils
# (as opposed to BSD `nm`).  Could use pyelftools instead but that's a lot of
# extra work.
#
# This is a developer tool, please don't put it in any packages :)

import sys, os, subprocess
import re
from collections import namedtuple

sys.path.insert(0, os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'python'))

from makevars import MakeVars

SymRowBase = namedtuple('SymRow', ['target', 'object', 'name', 'address', 'klass', 'typ', 'size', 'line', 'section', 'loc'])
class SymRow(SymRowBase):
    '''
    wrapper around a line of `nm` output
    '''
    lib_re = re.compile(r'/lib[^/]+\.(so|la)$')
    def is_global(self):
        return self.klass.isupper() or self.klass in 'uvw'
    def scope(self):
        if self.lib_re.search(self.target) is None:
            return self.target
        # "global"
        return None

    def is_export(self):
        '''
        FRR-specific list of symbols which are considered "externally used"

        e.g. hooks are by design APIs for external use, same for qobj_t_*
        frr_inet_ntop is here because it's used through an ELF alias to
        "inet_ntop()"
        '''
        if self.name in ['main', 'frr_inet_ntop', '_libfrr_version']:
            return True
        if self.name.startswith('_hook_'):
            return True
        if self.name.startswith('qobj_t_'):
            return True
        return False

class Symbols(dict):
    '''
    dict of all symbols in all libs & executables
    '''

    from_re = re.compile(r'^Symbols from (.*?):$')
    lt_re = re.compile(r'^(.*/)([^/]+)\.l[oa]$')

    def __init__(self):
        super().__init__()

    class ReportSym(object):
        def __init__(self, sym):
            self.sym = sym
        def __repr__(self):
            return '<%-25s %-40s [%s]>' % (self.__class__.__name__ + ':', self.sym.name, self.sym.loc)
        def __lt__(self, other):
            return self.sym.name.__lt__(other.sym.name)

    class ReportSymCouldBeStaticAlreadyLocal(ReportSym):
        idshort = 'Z'
        idlong = 'extrastatic'
        title = "symbol is local to library, but only used in its source file (make static?)"
    class ReportSymCouldBeStatic(ReportSym):
        idshort = 'S'
        idlong = 'static'
        title = "symbol is only used in its source file (make static?)"
    class ReportSymCouldBeLibLocal(ReportSym):
        idshort = 'L'
        idlong = 'liblocal'
        title = "symbol is only used inside of library"
    class ReportSymModuleAPI(ReportSym):
        idshort = 'A'
        idlong = 'api'
        title = "symbol (in executable) is referenced externally from a module"

    class Symbol(object):
        def __init__(self, name):
            super().__init__()
            self.name = name
            self.defs = {}
            self.refs = []

        def process(self, row):
            scope = row.scope()
            if row.section == '*UND*':
                self.refs.append(row)
            else:
                self.defs.setdefault(scope, []).append(row)

        def evaluate(self, out):
            '''
            generate output report

            invoked after all object files have been read in, so it can look
            at inter-object-file relationships
            '''
            if len(self.defs) == 0:
                out.extsyms.add(self.name)
                return

            for scopename, symdefs in self.defs.items():
                common_defs = [symdef for symdef in symdefs if symdef.section == '*COM*']
                proper_defs = [symdef for symdef in symdefs if symdef.section != '*COM*']

                if len(proper_defs) > 1:
                    print(self.name, ' DUPLICATE')
                    print('\tD: %s %s' % (scopename, '\n\t\t'.join([repr(s) for s in symdefs])))
                    for syms in self.refs:
                        print('\tR: %s' % (syms, ))
                    return

                if len(proper_defs):
                    primary_def = proper_defs[0]
                elif len(common_defs):
                    # "common" = global variables without initializer;
                    # they can occur in multiple .o files and the linker will
                    # merge them into one variable/storage location.
                    primary_def = common_defs[0]
                else:
                    # undefined symbol, e.g. libc
                    continue

                if scopename is not None and len(self.refs) > 0:
                    for ref in self.refs:
                        if ref.target != primary_def.target and ref.target.endswith('.la'):
                            outobj = out.report.setdefault(primary_def.object, [])
                            outobj.append(out.ReportSymModuleAPI(primary_def))
                            break

                if len(self.refs) == 0:
                    if primary_def.is_export():
                        continue
                    outobj = out.report.setdefault(primary_def.object, [])
                    if primary_def.visible:
                        outobj.append(out.ReportSymCouldBeStatic(primary_def))
                    else:
                        outobj.append(out.ReportSymCouldBeStaticAlreadyLocal(primary_def))
                    continue

                if scopename is None and primary_def.visible:
                    # lib symbol
                    for ref in self.refs:
                        if ref.target != primary_def.target:
                            break
                    else:
                        outobj = out.report.setdefault(primary_def.object, [])
                        outobj.append(out.ReportSymCouldBeLibLocal(primary_def))


    def evaluate(self):
        self.extsyms = set()
        self.report = {}

        for sym in self.values():
            sym.evaluate(self)

    def load(self, target, files):
        def libtoolmustdie(fn):
            m = self.lt_re.match(fn)
            if m is None:
                return fn
            return m.group(1) + '.libs/' + m.group(2) + '.o'

        def libtooltargetmustdie(fn):
            m = self.lt_re.match(fn)
            if m is None:
                a, b = fn.rsplit('/', 1)
                return '%s/.libs/%s' % (a, b)
            return m.group(1) + '.libs/' + m.group(2) + '.so'

        files = list(set([libtoolmustdie(fn) for fn in files]))

        def parse_nm_output(text):
            filename = None
            path_rel_to = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))

            for line in text.split('\n'):
                if line.strip() == '':
                    continue
                m = self.from_re.match(line)
                if m is not None:
                    filename = m.group(1)
                    continue
                if line.startswith('Name'):
                    continue

                items = [i.strip() for i in line.split('|')]
                loc = None
                if '\t' in items[-1]:
                    items[-1], loc = items[-1].split('\t', 1)
                    fn, lno = loc.rsplit(':', 1)
                    fn = os.path.relpath(fn, path_rel_to)
                    loc = '%s:%s' % (fn, lno)

                items[1] = int(items[1] if items[1] != '' else '0', 16)
                items[4] = int(items[4] if items[4] != '' else '0', 16)
                items.append(loc)
                row = SymRow(target, filename, *items)

                if row.section == '.group' or row.name == '_GLOBAL_OFFSET_TABLE_':
                    continue
                if not row.is_global():
                    continue

                yield row

        visible_syms = set()

        # the actual symbol report uses output from the individual object files
        # (e.g. lib/.libs/foo.o), but we also read the linked binary (e.g.
        # lib/.libs/libfrr.so) to determine which symbols are actually visible
        # in the linked result (this covers ELF "hidden"/"internal" linkage)

        libfile = libtooltargetmustdie(target)
        nmlib = subprocess.Popen(['nm', '-l', '-g', '--defined-only', '-f', 'sysv', libfile], stdout = subprocess.PIPE)
        out = nmlib.communicate()[0].decode('US-ASCII')

        for row in parse_nm_output(out):
            visible_syms.add(row.name)

        nm = subprocess.Popen(['nm', '-l', '-f', 'sysv'] + files, stdout = subprocess.PIPE)
        out = nm.communicate()[0].decode('US-ASCII')

        for row in parse_nm_output(out):
            row.visible = row.name in visible_syms
            sym = self.setdefault(row.name, self.Symbol(row.name))
            sym.process(row)


def write_html_report(syms):
    try:
        import jinja2
    except ImportError:
        sys.stderr.write('jinja2 could not be imported, not writing HTML report!\n')
        return

    self_path = os.path.dirname(os.path.abspath(__file__))
    jenv = jinja2.Environment(loader=jinja2.FileSystemLoader(self_path))
    template = jenv.get_template('symalyzer.html')

    dirgroups = {}
    for fn, reports in syms.report.items():
        dirname, filename = fn.replace('.libs/', '').rsplit('/', 1)
        dirgroups.setdefault(dirname, {})[fn] = reports

    klasses = {
        'T': 'code / plain old regular function (Text)',
        'D': 'global variable, read-write, with nonzero initializer (Data)',
        'B': 'global variable, read-write, with zero initializer (BSS)',
        'C': 'global variable, read-write, with zero initializer (Common)',
        'R': 'global variable, read-only (Rodata)',
    }

    with open('symalyzer_report.html.tmp', 'w') as fd:
        fd.write(template.render(dirgroups = dirgroups, klasses = klasses))
    os.rename('symalyzer_report.html.tmp', 'symalyzer_report.html')

    if not os.path.exists('jquery-3.4.1.min.js'):
        url = 'https://code.jquery.com/jquery-3.4.1.min.js'
        sys.stderr.write(
            'trying to grab a copy of jquery from %s\nif this fails, please get it manually (the HTML output is done.)\n' % (url))
        import requests
        r = requests.get('https://code.jquery.com/jquery-3.4.1.min.js')
        if r.status_code != 200:
            sys.stderr.write('failed -- please download jquery-3.4.1.min.js and put it next to the HTML report\n')
        else:
            with open('jquery-3.4.1.min.js.tmp', 'w') as fd:
                fd.write(r.text)
            os.rename('jquery-3.4.1.min.js.tmp', 'jquery-3.4.1.min.js')
            sys.stderr.write('done.\n')

def automake_escape(s):
    return s.replace('.', '_').replace('/', '_')

if __name__ == '__main__':
    mv = MakeVars()

    if not (os.path.exists('config.version') and os.path.exists('lib/.libs/libfrr.so')):
        sys.stderr.write('please execute this script in the root directory of an FRR build tree\n')
        sys.stderr.write('./configure && make need to have completed successfully\n')
        sys.exit(1)

    amtargets = ['bin_PROGRAMS', 'sbin_PROGRAMS', 'lib_LTLIBRARIES', 'module_LTLIBRARIES']
    targets = []

    mv.getvars(amtargets)
    for amtarget in amtargets:
        targets.extend([item for item in mv[amtarget].strip().split() if item != 'tools/ssd'])

    mv.getvars(['%s_LDADD' % automake_escape(t) for t in targets])
    ldobjs = targets[:]
    for t in targets:
        ldadd = mv['%s_LDADD' % automake_escape(t)].strip().split()
        for item in ldadd:
            if item.startswith('-'):
                continue
            if item.endswith('.a'):
                ldobjs.append(item)

    mv.getvars(['%s_OBJECTS' % automake_escape(o) for o in ldobjs])

    syms = Symbols()

    for t in targets:
        objs = mv['%s_OBJECTS' % automake_escape(t)].strip().split()
        ldadd = mv['%s_LDADD' % automake_escape(t)].strip().split()
        for item in ldadd:
            if item.startswith('-'):
                continue
            if item.endswith('.a'):
                objs.extend(mv['%s_OBJECTS' % automake_escape(item)].strip().split())

        sys.stderr.write('processing %s...\n' % t)
        sys.stderr.flush()
        #print(t, '\n\t', objs)
        syms.load(t, objs)

    syms.evaluate()

    for obj, reports in sorted(syms.report.items()):
        print('%s:' % obj)
        for report in reports:
            print('\t%r' % report)

    write_html_report(syms)