summaryrefslogtreecommitdiffstats
path: root/scripts/checkkconfigsymbols.py
blob: 0cae73b5c92592792c37ca7cacf78405b10812a6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
#!/usr/bin/env python2

"""Find Kconfig symbols that are referenced but not defined."""

# (c) 2014-2016 Valentin Rothberg <valentinrothberg@gmail.com>
# (c) 2014 Stefan Hengelein <stefan.hengelein@fau.de>
#
# Licensed under the terms of the GNU GPL License version 2


import difflib
import os
import re
import signal
import subprocess
import sys
from multiprocessing import Pool, cpu_count
from optparse import OptionParser
from subprocess import Popen, PIPE, STDOUT


# regex expressions
OPERATORS = r"&|\(|\)|\||\!"
FEATURE = r"(?:\w*[A-Z0-9]\w*){2,}"
DEF = r"^\s*(?:menu){,1}config\s+(" + FEATURE + r")\s*"
EXPR = r"(?:" + OPERATORS + r"|\s|" + FEATURE + r")+"
DEFAULT = r"default\s+.*?(?:if\s.+){,1}"
STMT = r"^\s*(?:if|select|depends\s+on|(?:" + DEFAULT + r"))\s+" + EXPR
SOURCE_FEATURE = r"(?:\W|\b)+[D]{,1}CONFIG_(" + FEATURE + r")"

# regex objects
REGEX_FILE_KCONFIG = re.compile(r".*Kconfig[\.\w+\-]*$")
REGEX_FEATURE = re.compile(r'(?!\B)' + FEATURE + r'(?!\B)')
REGEX_SOURCE_FEATURE = re.compile(SOURCE_FEATURE)
REGEX_KCONFIG_DEF = re.compile(DEF)
REGEX_KCONFIG_EXPR = re.compile(EXPR)
REGEX_KCONFIG_STMT = re.compile(STMT)
REGEX_KCONFIG_HELP = re.compile(r"^\s+(help|---help---)\s*$")
REGEX_FILTER_FEATURES = re.compile(r"[A-Za-z0-9]$")
REGEX_NUMERIC = re.compile(r"0[xX][0-9a-fA-F]+|[0-9]+")
REGEX_QUOTES = re.compile("(\"(.*?)\")")


def parse_options():
    """The user interface of this module."""
    usage = "%prog [options]\n\n"                                              \
            "Run this tool to detect Kconfig symbols that are referenced but " \
            "not defined in\nKconfig.  The output of this tool has the "       \
            "format \'Undefined symbol\\tFile list\'\n\n"                      \
            "If no option is specified, %prog will default to check your\n"    \
            "current tree.  Please note that specifying commits will "         \
            "\'git reset --hard\'\nyour current tree!  You may save "          \
            "uncommitted changes to avoid losing data."

    parser = OptionParser(usage=usage)

    parser.add_option('-c', '--commit', dest='commit', action='store',
                      default="",
                      help="Check if the specified commit (hash) introduces "
                           "undefined Kconfig symbols.")

    parser.add_option('-d', '--diff', dest='diff', action='store',
                      default="",
                      help="Diff undefined symbols between two commits.  The "
                           "input format bases on Git log's "
                           "\'commmit1..commit2\'.")

    parser.add_option('-f', '--find', dest='find', action='store_true',
                      default=False,
                      help="Find and show commits that may cause symbols to be "
                           "missing.  Required to run with --diff.")

    parser.add_option('-i', '--ignore', dest='ignore', action='store',
                      default="",
                      help="Ignore files matching this pattern.  Note that "
                           "the pattern needs to be a Python regex.  To "
                           "ignore defconfigs, specify -i '.*defconfig'.")

    parser.add_option('-s', '--sim', dest='sim', action='store', default="",
                      help="Print a list of maximum 10 string-similar symbols.")

    parser.add_option('', '--force', dest='force', action='store_true',
                      default=False,
                      help="Reset current Git tree even when it's dirty.")

    parser.add_option('', '--no-color', dest='color', action='store_false',
                      default=True,
                      help="Don't print colored output. Default when not "
                           "outputting to a terminal.")

    (opts, _) = parser.parse_args()

    if opts.commit and opts.diff:
        sys.exit("Please specify only one option at once.")

    if opts.diff and not re.match(r"^[\w\-\.]+\.\.[\w\-\.]+$", opts.diff):
        sys.exit("Please specify valid input in the following format: "
                 "\'commit1..commit2\'")

    if opts.commit or opts.diff:
        if not opts.force and tree_is_dirty():
            sys.exit("The current Git tree is dirty (see 'git status').  "
                     "Running this script may\ndelete important data since it "
                     "calls 'git reset --hard' for some performance\nreasons. "
                     " Please run this script in a clean Git tree or pass "
                     "'--force' if you\nwant to ignore this warning and "
                     "continue.")

    if opts.commit:
        opts.find = False

    if opts.ignore:
        try:
            re.match(opts.ignore, "this/is/just/a/test.c")
        except:
            sys.exit("Please specify a valid Python regex.")

    return opts


def main():
    """Main function of this module."""
    opts = parse_options()

    global color
    color = opts.color and sys.stdout.isatty()

    if opts.sim and not opts.commit and not opts.diff:
        sims = find_sims(opts.sim, opts.ignore)
        if sims:
            print "%s: %s" % (yel("Similar symbols"), ', '.join(sims))
        else:
            print "%s: no similar symbols found" % yel("Similar symbols")
        sys.exit(0)

    # dictionary of (un)defined symbols
    defined = {}
    undefined = {}

    if opts.commit or opts.diff:
        head = get_head()

        # get commit range
        commit_a = None
        commit_b = None
        if opts.commit:
            commit_a = opts.commit + "~"
            commit_b = opts.commit
        elif opts.diff:
            split = opts.diff.split("..")
            commit_a = split[0]
            commit_b = split[1]
            undefined_a = {}
            undefined_b = {}

        # get undefined items before the commit
        execute("git reset --hard %s" % commit_a)
        undefined_a, _ = check_symbols(opts.ignore)

        # get undefined items for the commit
        execute("git reset --hard %s" % commit_b)
        undefined_b, defined = check_symbols(opts.ignore)

        # report cases that are present for the commit but not before
        for feature in sorted(undefined_b):
            # feature has not been undefined before
            if not feature in undefined_a:
                files = sorted(undefined_b.get(feature))
                undefined[feature] = files
            # check if there are new files that reference the undefined feature
            else:
                files = sorted(undefined_b.get(feature) -
                               undefined_a.get(feature))
                if files:
                    undefined[feature] = files

        # reset to head
        execute("git reset --hard %s" % head)

    # default to check the entire tree
    else:
        undefined, defined = check_symbols(opts.ignore)

    # now print the output
    for feature in sorted(undefined):
        print red(feature)

        files = sorted(undefined.get(feature))
        print "%s: %s" % (yel("Referencing files"), ", ".join(files))

        sims = find_sims(feature, opts.ignore, defined)
        sims_out = yel("Similar symbols")
        if sims:
            print "%s: %s" % (sims_out, ', '.join(sims))
        else:
            print "%s: %s" % (sims_out, "no similar symbols found")

        if opts.find:
            print "%s:" % yel("Commits changing symbol")
            commits = find_commits(feature, opts.diff)
            if commits:
                for commit in commits:
                    commit = commit.split(" ", 1)
                    print "\t- %s (\"%s\")" % (yel(commit[0]), commit[1])
            else:
                print "\t- no commit found"
        print  #  new line


def yel(string):
    """
    Color %string yellow.
    """
    return "\033[33m%s\033[0m" % string if color else string


def red(string):
    """
    Color %string red.
    """
    return "\033[31m%s\033[0m" % string if color else string


def execute(cmd):
    """Execute %cmd and return stdout.  Exit in case of error."""
    try:
        cmdlist = cmd.split(" ")
        stdout = subprocess.check_output(cmdlist, stderr=STDOUT, shell=False)
    except subprocess.CalledProcessError as fail:
        exit("Failed to execute %s\n%s" % (cmd, fail))
    return stdout


def find_commits(symbol, diff):
    """Find commits changing %symbol in the given range of %diff."""
    commits = execute("git log --pretty=oneline --abbrev-commit -G %s %s"
                      % (symbol, diff))
    return [x for x in commits.split("\n") if x]


def tree_is_dirty():
    """Return true if the current working tree is dirty (i.e., if any file has
    been added, deleted, modified, renamed or copied but not committed)."""
    stdout = execute("git status --porcelain")
    for line in stdout:
        if re.findall(r"[URMADC]{1}", line[:2]):
            return True
    return False


def get_head():
    """Return commit hash of current HEAD."""
    stdout = execute("git rev-parse HEAD")
    return stdout.strip('\n')


def partition(lst, size):
    """Partition list @lst into eveni-sized lists of size @size."""
    return [lst[i::size] for i in xrange(size)]


def init_worker():
    """Set signal handler to ignore SIGINT."""
    signal.signal(signal.SIGINT, signal.SIG_IGN)


def find_sims(symbol, ignore, defined = []):
    """Return a list of max. ten Kconfig symbols that are string-similar to
    @symbol."""
    if defined:
        return sorted(difflib.get_close_matches(symbol, set(defined), 10))

    pool = Pool(cpu_count(), init_worker)
    kfiles = []
    for gitfile in get_files():
        if REGEX_FILE_KCONFIG.match(gitfile):
            kfiles.append(gitfile)

    arglist = []
    for part in partition(kfiles, cpu_count()):
        arglist.append((part, ignore))

    for res in pool.map(parse_kconfig_files, arglist):
        defined.extend(res[0])

    return sorted(difflib.get_close_matches(symbol, set(defined), 10))


def get_files():
    """Return a list of all files in the current git directory."""
    # use 'git ls-files' to get the worklist
    stdout = execute("git ls-files")
    if len(stdout) > 0 and stdout[-1] == "\n":
        stdout = stdout[:-1]

    files = []
    for gitfile in stdout.rsplit("\n"):
        if ".git" in gitfile or "ChangeLog" in gitfile or      \
                ".log" in gitfile or os.path.isdir(gitfile) or \
                gitfile.startswith("tools/"):
            continue
        files.append(gitfile)
    return files


def check_symbols(ignore):
    """Find undefined Kconfig symbols and return a dict with the symbol as key
    and a list of referencing files as value.  Files matching %ignore are not
    checked for undefined symbols."""
    pool = Pool(cpu_count(), init_worker)
    try:
        return check_symbols_helper(pool, ignore)
    except KeyboardInterrupt:
        pool.terminate()
        pool.join()
        sys.exit(1)


def check_symbols_helper(pool, ignore):
    """Helper method for check_symbols().  Used to catch keyboard interrupts in
    check_symbols() in order to properly terminate running worker processes."""
    source_files = []
    kconfig_files = []
    defined_features = []
    referenced_features = dict()  # {file: [features]}

    for gitfile in get_files():
        if REGEX_FILE_KCONFIG.match(gitfile):
            kconfig_files.append(gitfile)
        else:
            if ignore and not re.match(ignore, gitfile):
                continue
            # add source files that do not match the ignore pattern
            source_files.append(gitfile)

    # parse source files
    arglist = partition(source_files, cpu_count())
    for res in pool.map(parse_source_files, arglist):
        referenced_features.update(res)


    # parse kconfig files
    arglist = []
    for part in partition(kconfig_files, cpu_count()):
        arglist.append((part, ignore))
    for res in pool.map(parse_kconfig_files, arglist):
        defined_features.extend(res[0])
        referenced_features.update(res[1])
    defined_features = set(defined_features)

    # inverse mapping of referenced_features to dict(feature: [files])
    inv_map = dict()
    for _file, features in referenced_features.iteritems():
        for feature in features:
            inv_map[feature] = inv_map.get(feature, set())
            inv_map[feature].add(_file)
    referenced_features = inv_map

    undefined = {}  # {feature: [files]}
    for feature in sorted(referenced_features):
        # filter some false positives
        if feature == "FOO" or feature == "BAR" or \
                feature == "FOO_BAR" or feature == "XXX":
            continue
        if feature not in defined_features:
            if feature.endswith("_MODULE"):
                # avoid false positives for kernel modules
                if feature[:-len("_MODULE")] in defined_features:
                    continue
            undefined[feature] = referenced_features.get(feature)
    return undefined, defined_features


def parse_source_files(source_files):
    """Parse each source file in @source_files and return dictionary with source
    files as keys and lists of references Kconfig symbols as values."""
    referenced_features = dict()
    for sfile in source_files:
        referenced_features[sfile] = parse_source_file(sfile)
    return referenced_features


def parse_source_file(sfile):
    """Parse @sfile and return a list of referenced Kconfig features."""
    lines = []
    references = []

    if not os.path.exists(sfile):
        return references

    with open(sfile, "r") as stream:
        lines = stream.readlines()

    for line in lines:
        if not "CONFIG_" in line:
            continue
        features = REGEX_SOURCE_FEATURE.findall(line)
        for feature in features:
            if not REGEX_FILTER_FEATURES.search(feature):
                continue
            references.append(feature)

    return references


def get_features_in_line(line):
    """Return mentioned Kconfig features in @line."""
    return REGEX_FEATURE.findall(line)


def parse_kconfig_files(args):
    """Parse kconfig files and return tuple of defined and references Kconfig
    symbols.  Note, @args is a tuple of a list of files and the @ignore
    pattern."""
    kconfig_files = args[0]
    ignore = args[1]
    defined_features = []
    referenced_features = dict()

    for kfile in kconfig_files:
        defined, references = parse_kconfig_file(kfile)
        defined_features.extend(defined)
        if ignore and re.match(ignore, kfile):
            # do not collect references for files that match the ignore pattern
            continue
        referenced_features[kfile] = references
    return (defined_features, referenced_features)


def parse_kconfig_file(kfile):
    """Parse @kfile and update feature definitions and references."""
    lines = []
    defined = []
    references = []
    skip = False

    if not os.path.exists(kfile):
        return defined, references

    with open(kfile, "r") as stream:
        lines = stream.readlines()

    for i in range(len(lines)):
        line = lines[i]
        line = line.strip('\n')
        line = line.split("#")[0]  # ignore comments

        if REGEX_KCONFIG_DEF.match(line):
            feature_def = REGEX_KCONFIG_DEF.findall(line)
            defined.append(feature_def[0])
            skip = False
        elif REGEX_KCONFIG_HELP.match(line):
            skip = True
        elif skip:
            # ignore content of help messages
            pass
        elif REGEX_KCONFIG_STMT.match(line):
            line = REGEX_QUOTES.sub("", line)
            features = get_features_in_line(line)
            # multi-line statements
            while line.endswith("\\"):
                i += 1
                line = lines[i]
                line = line.strip('\n')
                features.extend(get_features_in_line(line))
            for feature in set(features):
                if REGEX_NUMERIC.match(feature):
                    # ignore numeric values
                    continue
                references.append(feature)

    return defined, references


if __name__ == "__main__":
    main()