diff options
author | Richard Levitte <levitte@openssl.org> | 2019-10-02 13:16:48 +0200 |
---|---|---|
committer | Richard Levitte <levitte@openssl.org> | 2019-10-02 19:26:24 +0200 |
commit | 705128b0f0dbc82db9e7b90aa4103eab9a5ce10e (patch) | |
tree | 06879b05c7f2f1369a48f1fd19166a800b7a94a4 /util/find-doc-nits | |
parent | Update "missing documentation" function lists (diff) | |
download | openssl-705128b0f0dbc82db9e7b90aa4103eab9a5ce10e.tar.xz openssl-705128b0f0dbc82db9e7b90aa4103eab9a5ce10e.zip |
util/find-doc-nits: more precise option and function name checker
The checks for our uses of 'B<' and 'I<' for options, and possibly
function names, was over-reaching quite a bit.
So we fine-tune it a bit:
- by only checking for options in man1 pages, and only in SYNOPSIS
and *OPTIONS sections.
- by only checking for function names in man3 pages.
The man1 option checker has the additional check that options found in
*OPTIONS are also found in SYNOPSIS andd vice versa.
In all cases, this also handles options and function names with
additional markup, such as 'B<-I<cipher>>' and 'B<sk_I<TYPE>_push>'.
Reviewed-by: Matt Caswell <matt@openssl.org>
(Merged from https://github.com/openssl/openssl/pull/10073)
Diffstat (limited to 'util/find-doc-nits')
-rwxr-xr-x | util/find-doc-nits | 171 |
1 files changed, 154 insertions, 17 deletions
diff --git a/util/find-doc-nits b/util/find-doc-nits index 67a2ee365c..9c5826d05f 100755 --- a/util/find-doc-nits +++ b/util/find-doc-nits @@ -18,6 +18,8 @@ use Getopt::Std; use lib catdir(dirname($0), "perl"); use OpenSSL::Util::Pod; +my $debug = 0; # Set to 1 for debug output + # Options. our($opt_d); our($opt_e); @@ -203,6 +205,150 @@ sub check_head_style { } } +# Because we have options and symbols with extra markup, we need +# to take that into account, so we need a regexp that extracts +# markup chunks, including recursive markup. +# please read up on /(?R)/ in perlre(1) +# (note: order is important, (?R) needs to come before .) +# (note: non-greedy is important, or something like 'B<foo> and B<bar>' +# will be captured as one item) +my $markup_re = + qr/( # Capture group + [BIL]< # The start of what we recurse on + (?:(?-1)|.)*? # recurse the whole regexp (refering to + # the last opened capture group, i.e. the + # start of this regexp), or pick next + # character. Do NOT be greedy! + > # The end of what we recurse on + )/x; # (the x allows this sort of split up regexp) + +# Options must start with a dash, followed by a letter, possibly +# followed by letters, digits, dashes and underscores, and the last +# character must be a letter or a digit. +# We do also accept the single -? or -n, where n is a digit +my $option_re = + qr/(?: + \? # Single question mark + | + \d # Single digit + | + - # Single dash (--) + | + [[:alpha:]](?:[-_[:alnum:]]*?[[:alnum:]])? + )/x; + +# Helper function to check if a given $thing is properly marked up +# option. It returns one of these values: +# +# undef if it's not an option +# "" if it's a malformed option +# $unwrapped the option with the outermost B<> wrapping removed. +sub normalise_option { + my $id = shift; + my $filename = shift; + my $thing = shift; + + my $unwrapped = $thing; + my $unmarked = $thing; + + # $unwrapped is the option with the outer B<> markup removed + $unwrapped =~ s/^B<//; + $unwrapped =~ s/>$//; + # $unmarked is the option with *all* markup removed + $unmarked =~ s/[BIL]<|>//msg; + + + # If we found an option, check it, collect it + if ( $unwrapped =~ /^\s*-/ ) { + return $unwrapped # return option with outer B<> removed + if $unmarked =~ /^-${option_re}$/; + return ""; # Malformed option + } + return undef; # Something else +} + +# Checks of command option (man1) formatting. The man1 checks are +# restricted to the SYNOPSIS and OPTIONS sections, the rest is too +# free form, we simply cannot be too strict there. + +sub option_check { + my $id = shift; + my $filename = shift; + my $contents = shift; + + my $synopsis = ($contents =~ /=head1\s+SYNOPSIS(.*?)=head1/s, $1); + + # Some pages have more than one OPTIONS section, let's make sure + # to get them all + my $options = ''; + while ( $contents =~ /=head1\s+[A-Z ]*?OPTIONS$(.*?)(?==head1)/msg ) { + $options .= $1; + } + + # Look for options with no or incorrect markup + while ( $synopsis =~ + /(?<![-<[:alnum:]])-(?:$markup_re|.)*(?![->[:alnum:]])/msg ) { + err($id, "Malformed option [1] in SYNOPSIS: $&"); + } + + while ( $synopsis =~ /$markup_re/msg ) { + my $found = $&; + print STDERR "$id:DEBUG[option_check] SYNOPSIS: found $found\n" + if $debug; + my $option_uw = normalise_option($id, $filename, $found); + err($id, "Malformed option [2] in SYNOPSIS: $found") + if defined $option_uw && $option_uw eq ''; + } + + # In OPTIONS, we look for =item paragraphs. + # (?=^\s*$) detects an empty line. + while ( $options =~ /=item\s+(.*?)(?=^\s*$)/msg ) { + my $item = $&; + + while ( $item =~ /(\[\s*)?($markup_re)/msg ) { + my $found = $2; + print STDERR "$id:DEBUG[option_check] OPTIONS: found $&\n" + if $debug; + err($id, "Unexpected bracket in OPTIONS =item: $item") + if ($1 // '') ne '' && $found =~ /^B<\s*-/; + + my $option_uw = normalise_option($id, $filename, $found); + err($id, "Malformed option in OPTIONS: $found") + if defined $option_uw && $option_uw eq ''; + } + } +} + +# Normal symbol form +my $symbol_re = qr/[[:alpha:]_][_[:alnum:]]*?/; + +# Checks of function name (man3) formatting. The man3 checks are +# easier than the man1 checks, we only check the names followed by (), +# and only the names that have POD markup. + +sub functionname_check { + my $id = shift; + my $filename = shift; + my $contents = shift; + + while ( $contents =~ /($markup_re)\(\)/msg ) { + print STDERR "$id:DEBUG[functionname_check] SYNOPSIS: found $&\n" + if $debug; + + my $symbol = $1; + my $unmarked = $symbol; + $unmarked =~ s/[BIL]<|>//msg; + + err($id, "Malformed symbol: $symbol") + unless $symbol =~ /^B<.*>$/ && $unmarked =~ /^${symbol_re}$/ + } + + # We can't do the kind of collecting coolness that option_check() + # does, because there are too many things that can't be found in + # name repositories like the NAME sections, such as symbol names + # with a variable part (typically marked up as B<foo_I<TYPE>_bar> +} + sub check { my $filename = shift; my $dirname = basename(dirname($filename)); @@ -225,9 +371,14 @@ sub check { check_section_location($id, $contents, "EXAMPLES", "SEE ALSO"); } - name_synopsis($id, $filename, $contents) - unless $contents =~ /=for comment generic/ - or $filename =~ m@man[157]/@; + unless ( $contents =~ /=for comment generic/ ) { + if ( $filename =~ m|man3/| ) { + name_synopsis($id, $filename, $contents); + functionname_check($id, $filename, $contents); + } elsif ( $filename =~ m|man1/| ) { + option_check($id, $filename, $contents) + } + } err($id, "doesn't start with =pod") if $contents !~ /^=pod/; @@ -255,20 +406,6 @@ sub check { if $contents =~ /=over([^ ][^24])/; err($id, "Possible version style issue") if $contents =~ /OpenSSL version [019]/; - err($id, "Brackets on item line") - if $contents =~ /=item \[/; - if ( $contents !~ /=for comment generic/) { - # Some API pages have B<foo<I<TYPE>bar>. - err($id, "Bad flag formatting inside B<>") - if $contents =~ /B<-[A-Za-z_ ]+ /; - while ( $contents =~ /([BI])<([^>]*)>/g ) { - my $B = $1; - my $T = $2; - next if $T =~ /E</; # Assume it's E<lt> - err($id, "Bad content inside $B<$T>") - if $T =~ /[<|]/; - } - } if ( $contents !~ /=for comment multiple includes/ ) { # Look for multiple consecutive openssl #include lines |