Rework util/find-doc-nits to distinguish internal documentation

We didn't really distinguish internal and public documentation, or
matched that with the state of the documented symbols.  we therefore
needed to rework the logic to account for the state of each symbol.

To simplify things, and make them consistent, we load all of
util/*.num, util/*.syms and util/missing*.txt unconditionally.

Also, we rework the reading of the manuals to happen only once (or
well, not quite, Pod::Checker reads from file too, but at the very
least, our script isn't reading the same file multiple times).

Reviewed-by: Matt Caswell <matt@openssl.org>
(Merged from https://github.com/openssl/openssl/pull/11476)
This commit is contained in:
Richard Levitte 2020-04-06 13:51:36 +02:00
parent eacd30a703
commit 8270c4791d
2 changed files with 138 additions and 108 deletions

View File

@ -79,7 +79,6 @@ die "Need one of -[cdehlnouv] flags.\n"
my $temp = '/tmp/docnits.txt'; my $temp = '/tmp/docnits.txt';
my $OUT; my $OUT;
my %public;
my $status = 0; my $status = 0;
my @sections = ( 'man1', 'man3', 'man5', 'man7' ); my @sections = ( 'man1', 'man3', 'man5', 'man7' );
@ -89,7 +88,17 @@ my %mandatory_sections = (
3 => [ 'SYNOPSIS', 'RETURN VALUES' ], 3 => [ 'SYNOPSIS', 'RETURN VALUES' ],
5 => [ ], 5 => [ ],
7 => [ ] 7 => [ ]
); );
# Symbols that we ignored.
# They are internal macros that we currently don't document
my $ignored = qr/(?| ^i2d_
| ^d2i_
| ^DEPRECATEDIN
| \Q_fnsig(3)\E$
| ^IMPLEMENT_
| ^_?DECLARE_
)/x;
# Collect all POD files, both internal and public, and regardless of location # Collect all POD files, both internal and public, and regardless of location
# We collect them in a hash table with each file being a key, so we can attach # We collect them in a hash table with each file being a key, so we can attach
@ -298,12 +307,6 @@ sub name_synopsis {
if %foundfilenames; if %foundfilenames;
err($id, "$simplename (filename) missing from NAME section") err($id, "$simplename (filename) missing from NAME section")
unless $foundfilename; unless $foundfilename;
if ( $filename !~ /internal/ ) {
foreach my $n ( keys %names ) {
err($id, "$n is not public")
if !defined $public{$n};
}
}
# Find all functions in SYNOPSIS # Find all functions in SYNOPSIS
return unless $contents =~ /=head1 SYNOPSIS(.*)=head1 DESCRIPTION/ms; return unless $contents =~ /=head1 SYNOPSIS(.*)=head1 DESCRIPTION/ms;
@ -537,7 +540,7 @@ sub functionname_check {
$unmarked =~ s/[BIL]<|>//msg; $unmarked =~ s/[BIL]<|>//msg;
err($id, "Malformed symbol: $symbol") err($id, "Malformed symbol: $symbol")
unless $symbol =~ /^B<.*>$/ && $unmarked =~ /^${symbol_re}$/ unless $symbol =~ /^B<.*?>$/ && $unmarked =~ /^${symbol_re}$/
} }
# We can't do the kind of collecting coolness that option_check() # We can't do the kind of collecting coolness that option_check()
@ -612,16 +615,10 @@ sub wording {
# Perform all sorts of nit/error checks on a manpage # Perform all sorts of nit/error checks on a manpage
sub check { sub check {
my $filename = shift; my %podinfo = @_;
my $filename = $podinfo{filename};
my $dirname = basename(dirname($filename)); my $dirname = basename(dirname($filename));
my $contents = $podinfo{contents};
my $contents = '';
{
local $/ = undef;
open POD, $filename or die "Couldn't open $filename, $!";
$contents = <POD>;
close POD;
}
my $id = "${filename}:1:"; my $id = "${filename}:1:";
check_head_style($id, $contents); check_head_style($id, $contents);
@ -663,7 +660,7 @@ sub check {
unless $target =~ /^[_[:alpha:]][_[:alnum:]]*$/ unless $target =~ /^[_[:alpha:]][_[:alnum:]]*$/
} }
unless ( $contents =~ /=for openssl generic/ ) { unless ( $contents =~ /^=for openssl generic/ms ) {
if ( $filename =~ m|man3/| ) { if ( $filename =~ m|man3/| ) {
name_synopsis($id, $filename, $contents); name_synopsis($id, $filename, $contents);
functionname_check($id, $filename, $contents); functionname_check($id, $filename, $contents);
@ -740,10 +737,38 @@ sub check {
} }
} }
# Information database ###############################################
# Map of links in each POD file; filename => [ "foo(1)", "bar(3)", ... ]
my %link_map = ();
# Map of names in each POD file or from "missing" files; possible values are:
# If found in a POD files, "name(s)" => filename
# If found in a "missing" file or external, "name(s)" => ''
my %name_map = ();
# State of man-page names.
# %state is affected by loading util/*.num and util/*.syms
# Values may be one of:
# 'crypto' : belongs in libcrypto (loaded from libcrypto.num)
# 'ssl' : belongs in libssl (loaded from libssl.num)
# 'other' : belongs in libcrypto or libssl (loaded from other.syms)
# 'internal' : Internal
# 'public' : Public (generic name or external documentation)
# Any of these values except 'public' may be prefixed with 'missing_'
# to indicate that they are known to be missing.
my %state;
# %missing is affected by loading util/missing*.txt. Values may be one of:
# 'crypto' : belongs in libcrypto (loaded from libcrypto.num)
# 'ssl' : belongs in libssl (loaded from libssl.num)
# 'other' : belongs in libcrypto or libssl (loaded from other.syms)
# 'internal' : Internal
my %missing;
# Parse libcrypto.num, etc., and return sorted list of what's there. # Parse libcrypto.num, etc., and return sorted list of what's there.
sub parsenum { sub loadnum ($;$) {
my $file = shift; my $file = shift;
my @apis; my $type = shift;
my @symbols;
open my $IN, '<', catfile($config{sourcedir}, $file) open my $IN, '<', catfile($config{sourcedir}, $file)
or die "Can't open $file, $!, stopped"; or die "Can't open $file, $!, stopped";
@ -754,42 +779,52 @@ sub parsenum {
my @fields = split(); my @fields = split();
die "Malformed line $. in $file: $_" die "Malformed line $. in $file: $_"
if scalar @fields != 2 && scalar @fields != 4; if scalar @fields != 2 && scalar @fields != 4;
push @apis, $fields[0]; $state{$fields[0].'(3)'} = $type // 'internal';
} }
close $IN; close $IN;
return sort @apis;
} }
# Parse all the manpages, getting return map of what they document
# (by looking at their NAME sections).
# Map of links in each POD file; filename => [ "foo(1)", "bar(3)", ... ]
my %link_map = ();
# Map of names in each POD file; "name(s)" => filename
my %name_map = ();
# Load file of symbol names that we know aren't documented. # Load file of symbol names that we know aren't documented.
sub loadmissing($) sub loadmissing($;$)
{ {
my $missingfile = shift; my $missingfile = shift;
my @missing; my $type = shift;
open FH, catfile($config{sourcedir}, $missingfile) open FH, catfile($config{sourcedir}, $missingfile)
or die "Can't open $missingfile"; or die "Can't open $missingfile";
while ( <FH> ) { while ( <FH> ) {
chomp; chomp;
next if /^#/; next if /^#/;
push @missing, $_; $missing{$_} = $type // 'internal';
} }
close FH; close FH;
}
for (@missing) { # Check that we have consistent public / internal documentation and declaration
err("$missingfile:", "$_ is documented in $name_map{$_}") sub checkstate () {
if !$opt_o && exists $name_map{$_} && defined $name_map{$_}; # Collect all known names, no matter where they come from
my %names = map { $_ => 1 } (keys %name_map, keys %state, keys %missing);
# Check section 3, i.e. functions and macros
foreach ( grep { $_ =~ /\(3\)$/ } sort keys %names ) {
next if ( $name_map{$_} // '') eq '' || $_ =~ /$ignored/;
# If a man-page isn't recorded public or if it's recorded missing
# and internal, it's declared to be internal.
my $declared_internal =
($state{$_} // 'internal') eq 'internal'
|| ($missing{$_} // '') eq 'internal';
# If a man-page isn't recorded internal or if it's recorded missing
# and not internal, it's declared to be public
my $declared_public =
($state{$_} // 'internal') ne 'internal'
|| ($missing{$_} // 'internal') ne 'internal';
err("$_ is supposedly public but is documented as internal")
if ( $declared_public && $name_map{$_} =~ /\/internal\// );
err("$_ is supposedly internal but is documented as public")
if ( $declared_internal && $name_map{$_} !~ /\/internal\// );
} }
return @missing;
} }
# Check for undocumented macros; ignore those in the "missing" file # Check for undocumented macros; ignore those in the "missing" file
@ -797,13 +832,6 @@ sub loadmissing($)
sub checkmacros { sub checkmacros {
my $count = 0; my $count = 0;
my %seen; my %seen;
my @missing;
if ( $opt_o ) {
@missing = loadmissing('util/missingmacro111.txt');
} elsif ( $opt_v ) {
@missing = loadmissing('util/missingmacro.txt');
}
foreach my $f ( files(TAGS => 'public_header') ) { foreach my $f ( files(TAGS => 'public_header') ) {
# Skip some internals we don't want to document yet. # Skip some internals we don't want to document yet.
@ -816,16 +844,10 @@ sub checkmacros {
while ( <IN> ) { while ( <IN> ) {
next unless /^#\s*define\s*(\S+)\(/; next unless /^#\s*define\s*(\S+)\(/;
my $macro = "$1(3)"; # We know they're all in section 3 my $macro = "$1(3)"; # We know they're all in section 3
next if exists $name_map{$macro} || defined $seen{$macro}; next if defined $name_map{$macro}
next if $macro =~ /^i2d_/ || defined $missing{$macro}
|| $macro =~ /^d2i_/ || defined $seen{$macro}
|| $macro =~ /^DEPRECATEDIN/ || $macro =~ /$ignored/;
|| $macro =~ /\Q_fnsig(3)\E$/
|| $macro =~ /^IMPLEMENT_/
|| $macro =~ /^_?DECLARE_/;
# Skip macros known to be missing
next if $opt_v && grep( /^\Q$macro\E$/, @missing);
err("$f:", "macro $macro undocumented") err("$f:", "macro $macro undocumented")
if $opt_d || $opt_e; if $opt_d || $opt_e;
@ -840,39 +862,38 @@ sub checkmacros {
# Find out what is undocumented (filtering out the known missing ones) # Find out what is undocumented (filtering out the known missing ones)
# and display them. # and display them.
sub printem { sub printem ($) {
my $libname = shift; my $type = shift;
my $numfile = shift;
my $missingfile = shift;
my $count = 0; my $count = 0;
my %seen; my %seen;
my @missing = loadmissing($missingfile) if $opt_v; foreach my $func ( grep { $_ eq $type } sort keys %state ) {
foreach my $func ( parsenum($numfile) ) {
$func .= '(3)'; # We know they're all in section 3 $func .= '(3)'; # We know they're all in section 3
next if exists $name_map{$func} || defined $seen{$func};
# Skip functions known to be missing. # Skip functions known to be missing
next if $opt_v && grep( /^\Q$func\E$/, @missing); next if $opt_v && defined $name_map{$func} && $name_map{$func} eq '';
err("$libname:", "function $func undocumented") # Skip known names
next if defined $name_map{$func} || defined $seen{$func};
err("$type:", "function $func undocumented")
if $opt_d || $opt_e; if $opt_d || $opt_e;
$count++; $count++;
$seen{$func} = 1; $seen{$func} = 1;
} }
err("# $count in $numfile are not documented") err("# $count lib$type names are not documented")
if $count > 0; if $count > 0;
} }
# Collect all the names in a manpage. # Collect all the names in a manpage.
sub collectnames { sub collectnames {
my $filename = shift; my %podinfo = @_;
my $filename = $podinfo{filename};
$filename =~ m|man(\d)/|; $filename =~ m|man(\d)/|;
my $section = $1; my $section = $1;
my $simplename = basename($filename, ".pod"); my $simplename = basename($filename, ".pod");
my $id = "${filename}:1:"; my $id = "${filename}:1:";
my %podinfo = extract_pod_info($filename, { debug => $debug }); my $is_generic = $podinfo{contents} =~ /^=for openssl generic/ms;
unless ( grep { $simplename eq $_ } @{$podinfo{names}} ) { unless ( grep { $simplename eq $_ } @{$podinfo{names}} ) {
err($id, "$simplename not in NAME section"); err($id, "$simplename not in NAME section");
@ -883,12 +904,15 @@ sub collectnames {
err($id, "'$name' contains white space") err($id, "'$name' contains white space")
if $name =~ /\s/; if $name =~ /\s/;
my $name_sec = "$name($section)"; my $name_sec = "$name($section)";
if ( !exists $name_map{$name_sec} ) { if ( !defined $name_map{$name_sec} ) {
$name_map{$name_sec} = $filename; $name_map{$name_sec} = $filename;
$state{$name_sec} =
( $filename =~ /\/internal\// ? 'internal' : 'public' )
if $is_generic;
} elsif ( $filename eq $name_map{$name_sec} ) { } elsif ( $filename eq $name_map{$name_sec} ) {
err($id, "$name_sec duplicated in NAME section of", err($id, "$name_sec duplicated in NAME section of",
$name_map{$name_sec}); $name_map{$name_sec});
} else { } elsif ( $name_map{$name_sec} ne '' ) {
err($id, "$name_sec also in NAME section of", err($id, "$name_sec also in NAME section of",
$name_map{$name_sec}); $name_map{$name_sec});
} }
@ -896,7 +920,8 @@ sub collectnames {
if ( $podinfo{contents} =~ /=for openssl foreign manual (.*)\n/ ) { if ( $podinfo{contents} =~ /=for openssl foreign manual (.*)\n/ ) {
foreach my $f ( split / /, $1 ) { foreach my $f ( split / /, $1 ) {
$name_map{$f} = undef; # It still exists! $name_map{$f} = ''; # It still exists!
$state{$f} = 'public'; # We assume!
} }
} }
@ -918,24 +943,15 @@ sub checklinks {
foreach my $filename ( sort keys %link_map ) { foreach my $filename ( sort keys %link_map ) {
foreach my $link ( @{$link_map{$filename}} ) { foreach my $link ( @{$link_map{$filename}} ) {
err("${filename}:1:", "reference to non-existing $link") err("${filename}:1:", "reference to non-existing $link")
unless exists $name_map{$link}; unless defined $name_map{$link} || defined $missing{$link};
err("${filename}:1:", "reference of internal $link in public documentation $filename")
if ( ( ($state{$link} // '') eq 'internal'
|| ($missing{$link} // '') eq 'internal' )
&& $filename !~ /\/internal\// );
} }
} }
} }
# Load the public symbol/macro names
sub publicize {
foreach my $name ( parsenum('util/libcrypto.num') ) {
$public{$name} = 1;
}
foreach my $name ( parsenum('util/libssl.num') ) {
$public{$name} = 1;
}
foreach my $name ( parsenum('util/other.syms') ) {
$public{$name} = 1;
}
}
# Cipher/digests to skip if they show up as "not implemented" # Cipher/digests to skip if they show up as "not implemented"
# because they are, via the "-*" construct. # because they are, via the "-*" construct.
my %skips = ( my %skips = (
@ -1062,26 +1078,42 @@ if ( $opt_c ) {
exit $status; exit $status;
} }
# Preparation for some options, populate %name_map and %link_map # Populate %state
if ( $opt_l || $opt_u || $opt_v ) { loadnum('util/libcrypto.num', 'crypto');
foreach ( files(TAGS => 'manual') ) { loadnum('util/libssl.num', 'ssl');
collectnames($_); loadnum('util/other.syms', 'other');
loadnum('util/other-internal.syms');
if ( $opt_o ) {
loadmissing('util/missingmacro111.txt', 'crypto');
loadmissing('util/missingcrypto111.txt', 'crypto');
loadmissing('util/missingssl111.txt', 'ssl');
} else {
loadmissing('util/missingmacro.txt', 'crypto');
loadmissing('util/missingcrypto.txt', 'crypto');
loadmissing('util/missingssl.txt', 'ssl');
loadmissing('util/missingcrypto-internal.txt');
loadmissing('util/missingssl-internal.txt');
}
if ( $opt_n || $opt_l || $opt_u || $opt_v ) {
my @files_to_read = ( $opt_n && @ARGV ) ? @ARGV : files(TAGS => 'manual');
foreach (@files_to_read) {
my %podinfo = extract_pod_info($_, { debug => $debug });
collectnames(%podinfo)
if ( $opt_l || $opt_u || $opt_v );
check(%podinfo)
if ( $opt_n );
} }
} }
if ( $opt_l ) { if ( $opt_l ) {
foreach my $func ( loadmissing("util/missingcrypto.txt") ) {
$name_map{$func} = undef;
}
checklinks(); checklinks();
} }
if ( $opt_n ) { if ( $opt_n ) {
publicize();
foreach ( @ARGV ? @ARGV : files(TAGS => 'manual') ) {
check($_);
}
# If not given args, check that all man1 commands are named properly. # If not given args, check that all man1 commands are named properly.
if ( scalar @ARGV == 0 ) { if ( scalar @ARGV == 0 ) {
foreach ( files(TAGS => [ 'public_manual', 'man1' ]) ) { foreach ( files(TAGS => [ 'public_manual', 'man1' ]) ) {
@ -1091,14 +1123,11 @@ if ( $opt_n ) {
} }
} }
checkstate();
if ( $opt_u || $opt_v) { if ( $opt_u || $opt_v) {
if ( $opt_o ) { printem('crypto');
printem('crypto', 'util/libcrypto.num', 'util/missingcrypto111.txt'); printem('ssl');
printem('ssl', 'util/libssl.num', 'util/missingssl111.txt');
} else {
printem('crypto', 'util/libcrypto.num', 'util/missingcrypto.txt');
printem('ssl', 'util/libssl.num', 'util/missingssl.txt');
}
checkmacros(); checkmacros();
} }

View File

@ -185,7 +185,8 @@ sub extract_pod_info {
return ( section => $podinfo{section}, return ( section => $podinfo{section},
names => [ @names, @invisible_names ], names => [ @names, @invisible_names ],
contents => $contents ); contents => $contents,
filename => $filename );
} }
1; 1;