summaryrefslogtreecommitdiffstats
path: root/util/find-doc-nits
diff options
context:
space:
mode:
authorRichard Levitte <levitte@openssl.org>2019-10-02 13:16:48 +0200
committerRichard Levitte <levitte@openssl.org>2019-10-02 19:26:24 +0200
commit705128b0f0dbc82db9e7b90aa4103eab9a5ce10e (patch)
tree06879b05c7f2f1369a48f1fd19166a800b7a94a4 /util/find-doc-nits
parentUpdate "missing documentation" function lists (diff)
downloadopenssl-705128b0f0dbc82db9e7b90aa4103eab9a5ce10e.tar.xz
openssl-705128b0f0dbc82db9e7b90aa4103eab9a5ce10e.zip
util/find-doc-nits: more precise option and function name checker
The checks for our uses of 'B<' and 'I<' for options, and possibly function names, was over-reaching quite a bit. So we fine-tune it a bit: - by only checking for options in man1 pages, and only in SYNOPSIS and *OPTIONS sections. - by only checking for function names in man3 pages. The man1 option checker has the additional check that options found in *OPTIONS are also found in SYNOPSIS andd vice versa. In all cases, this also handles options and function names with additional markup, such as 'B<-I<cipher>>' and 'B<sk_I<TYPE>_push>'. Reviewed-by: Matt Caswell <matt@openssl.org> (Merged from https://github.com/openssl/openssl/pull/10073)
Diffstat (limited to 'util/find-doc-nits')
-rwxr-xr-xutil/find-doc-nits171
1 files changed, 154 insertions, 17 deletions
diff --git a/util/find-doc-nits b/util/find-doc-nits
index 67a2ee365c..9c5826d05f 100755
--- a/util/find-doc-nits
+++ b/util/find-doc-nits
@@ -18,6 +18,8 @@ use Getopt::Std;
use lib catdir(dirname($0), "perl");
use OpenSSL::Util::Pod;
+my $debug = 0; # Set to 1 for debug output
+
# Options.
our($opt_d);
our($opt_e);
@@ -203,6 +205,150 @@ sub check_head_style {
}
}
+# Because we have options and symbols with extra markup, we need
+# to take that into account, so we need a regexp that extracts
+# markup chunks, including recursive markup.
+# please read up on /(?R)/ in perlre(1)
+# (note: order is important, (?R) needs to come before .)
+# (note: non-greedy is important, or something like 'B<foo> and B<bar>'
+# will be captured as one item)
+my $markup_re =
+ qr/( # Capture group
+ [BIL]< # The start of what we recurse on
+ (?:(?-1)|.)*? # recurse the whole regexp (refering to
+ # the last opened capture group, i.e. the
+ # start of this regexp), or pick next
+ # character. Do NOT be greedy!
+ > # The end of what we recurse on
+ )/x; # (the x allows this sort of split up regexp)
+
+# Options must start with a dash, followed by a letter, possibly
+# followed by letters, digits, dashes and underscores, and the last
+# character must be a letter or a digit.
+# We do also accept the single -? or -n, where n is a digit
+my $option_re =
+ qr/(?:
+ \? # Single question mark
+ |
+ \d # Single digit
+ |
+ - # Single dash (--)
+ |
+ [[:alpha:]](?:[-_[:alnum:]]*?[[:alnum:]])?
+ )/x;
+
+# Helper function to check if a given $thing is properly marked up
+# option. It returns one of these values:
+#
+# undef if it's not an option
+# "" if it's a malformed option
+# $unwrapped the option with the outermost B<> wrapping removed.
+sub normalise_option {
+ my $id = shift;
+ my $filename = shift;
+ my $thing = shift;
+
+ my $unwrapped = $thing;
+ my $unmarked = $thing;
+
+ # $unwrapped is the option with the outer B<> markup removed
+ $unwrapped =~ s/^B<//;
+ $unwrapped =~ s/>$//;
+ # $unmarked is the option with *all* markup removed
+ $unmarked =~ s/[BIL]<|>//msg;
+
+
+ # If we found an option, check it, collect it
+ if ( $unwrapped =~ /^\s*-/ ) {
+ return $unwrapped # return option with outer B<> removed
+ if $unmarked =~ /^-${option_re}$/;
+ return ""; # Malformed option
+ }
+ return undef; # Something else
+}
+
+# Checks of command option (man1) formatting. The man1 checks are
+# restricted to the SYNOPSIS and OPTIONS sections, the rest is too
+# free form, we simply cannot be too strict there.
+
+sub option_check {
+ my $id = shift;
+ my $filename = shift;
+ my $contents = shift;
+
+ my $synopsis = ($contents =~ /=head1\s+SYNOPSIS(.*?)=head1/s, $1);
+
+ # Some pages have more than one OPTIONS section, let's make sure
+ # to get them all
+ my $options = '';
+ while ( $contents =~ /=head1\s+[A-Z ]*?OPTIONS$(.*?)(?==head1)/msg ) {
+ $options .= $1;
+ }
+
+ # Look for options with no or incorrect markup
+ while ( $synopsis =~
+ /(?<![-<[:alnum:]])-(?:$markup_re|.)*(?![->[:alnum:]])/msg ) {
+ err($id, "Malformed option [1] in SYNOPSIS: $&");
+ }
+
+ while ( $synopsis =~ /$markup_re/msg ) {
+ my $found = $&;
+ print STDERR "$id:DEBUG[option_check] SYNOPSIS: found $found\n"
+ if $debug;
+ my $option_uw = normalise_option($id, $filename, $found);
+ err($id, "Malformed option [2] in SYNOPSIS: $found")
+ if defined $option_uw && $option_uw eq '';
+ }
+
+ # In OPTIONS, we look for =item paragraphs.
+ # (?=^\s*$) detects an empty line.
+ while ( $options =~ /=item\s+(.*?)(?=^\s*$)/msg ) {
+ my $item = $&;
+
+ while ( $item =~ /(\[\s*)?($markup_re)/msg ) {
+ my $found = $2;
+ print STDERR "$id:DEBUG[option_check] OPTIONS: found $&\n"
+ if $debug;
+ err($id, "Unexpected bracket in OPTIONS =item: $item")
+ if ($1 // '') ne '' && $found =~ /^B<\s*-/;
+
+ my $option_uw = normalise_option($id, $filename, $found);
+ err($id, "Malformed option in OPTIONS: $found")
+ if defined $option_uw && $option_uw eq '';
+ }
+ }
+}
+
+# Normal symbol form
+my $symbol_re = qr/[[:alpha:]_][_[:alnum:]]*?/;
+
+# Checks of function name (man3) formatting. The man3 checks are
+# easier than the man1 checks, we only check the names followed by (),
+# and only the names that have POD markup.
+
+sub functionname_check {
+ my $id = shift;
+ my $filename = shift;
+ my $contents = shift;
+
+ while ( $contents =~ /($markup_re)\(\)/msg ) {
+ print STDERR "$id:DEBUG[functionname_check] SYNOPSIS: found $&\n"
+ if $debug;
+
+ my $symbol = $1;
+ my $unmarked = $symbol;
+ $unmarked =~ s/[BIL]<|>//msg;
+
+ err($id, "Malformed symbol: $symbol")
+ unless $symbol =~ /^B<.*>$/ && $unmarked =~ /^${symbol_re}$/
+ }
+
+ # We can't do the kind of collecting coolness that option_check()
+ # does, because there are too many things that can't be found in
+ # name repositories like the NAME sections, such as symbol names
+ # with a variable part (typically marked up as B<foo_I<TYPE>_bar>
+}
+
sub check {
my $filename = shift;
my $dirname = basename(dirname($filename));
@@ -225,9 +371,14 @@ sub check {
check_section_location($id, $contents, "EXAMPLES", "SEE ALSO");
}
- name_synopsis($id, $filename, $contents)
- unless $contents =~ /=for comment generic/
- or $filename =~ m@man[157]/@;
+ unless ( $contents =~ /=for comment generic/ ) {
+ if ( $filename =~ m|man3/| ) {
+ name_synopsis($id, $filename, $contents);
+ functionname_check($id, $filename, $contents);
+ } elsif ( $filename =~ m|man1/| ) {
+ option_check($id, $filename, $contents)
+ }
+ }
err($id, "doesn't start with =pod")
if $contents !~ /^=pod/;
@@ -255,20 +406,6 @@ sub check {
if $contents =~ /=over([^ ][^24])/;
err($id, "Possible version style issue")
if $contents =~ /OpenSSL version [019]/;
- err($id, "Brackets on item line")
- if $contents =~ /=item \[/;
- if ( $contents !~ /=for comment generic/) {
- # Some API pages have B<foo<I<TYPE>bar>.
- err($id, "Bad flag formatting inside B<>")
- if $contents =~ /B<-[A-Za-z_ ]+ /;
- while ( $contents =~ /([BI])<([^>]*)>/g ) {
- my $B = $1;
- my $T = $2;
- next if $T =~ /E</; # Assume it's E<lt>
- err($id, "Bad content inside $B<$T>")
- if $T =~ /[<|]/;
- }
- }
if ( $contents !~ /=for comment multiple includes/ ) {
# Look for multiple consecutive openssl #include lines