1#! /usr/bin/env perl 2# Copyright 2002-2023 The OpenSSL Project Authors. All Rights Reserved. 3# 4# Licensed under the Apache License 2.0 (the "License"). You may not use 5# this file except in compliance with the License. You can obtain a copy 6# in the file LICENSE in the source distribution or at 7# https://www.openssl.org/source/license.html 8 9 10require 5.10.0; 11use warnings; 12use strict; 13 14use Carp qw(:DEFAULT cluck); 15use Pod::Checker; 16use File::Find; 17use File::Basename; 18use File::Spec::Functions; 19use Getopt::Std; 20use FindBin; 21use lib "$FindBin::Bin/perl"; 22 23use OpenSSL::Util::Pod; 24 25use lib '.'; 26use configdata; 27 28# Set to 1 for debug output 29my $debug = 0; 30 31# Options. 32our($opt_d); 33our($opt_e); 34our($opt_s); 35our($opt_o); 36our($opt_h); 37our($opt_l); 38our($opt_m); 39our($opt_n); 40our($opt_p); 41our($opt_u); 42our($opt_v); 43our($opt_c); 44 45# Print usage message and exit. 46sub help { 47 print <<EOF; 48Find small errors (nits) in documentation. Options: 49 -c List undocumented commands, undocumented options and unimplemented options. 50 -d Detailed list of undocumented (implies -u) 51 -e Detailed list of new undocumented (implies -v) 52 -h Print this help message 53 -l Print bogus links 54 -m Name(s) of manuals to focus on. Default: man1,man3,man5,man7 55 -n Print nits in POD pages 56 -o Causes -e/-v to count symbols added since 1.1.1 as new (implies -v) 57 -u Count undocumented functions 58 -v Count new undocumented functions 59EOF 60 exit; 61} 62 63getopts('cdehlm:nouv'); 64 65help() if $opt_h; 66$opt_u = 1 if $opt_d; 67$opt_v = 1 if $opt_o || $opt_e; 68die "Cannot use both -u and -v" 69 if $opt_u && $opt_v; 70die "Cannot use both -d and -e" 71 if $opt_d && $opt_e; 72 73# We only need to check c, l, n, u and v. 74# Options d, e, o imply one of the above. 75die "Need one of -[cdehlnouv] flags.\n" 76 unless $opt_c or $opt_l or $opt_n or $opt_u or $opt_v; 77 78 79my $temp = '/tmp/docnits.txt'; 80my $OUT; 81my $status = 0; 82 83$opt_m = "man1,man3,man5,man7" unless $opt_m; 84die "Argument of -m option may contain only man1, man3, man5, and/or man7" 85 unless $opt_m =~ /^(man[1357][, ]?)*$/; 86my @sections = ( split /[, ]/, $opt_m ); 87 88my %mandatory_sections = ( 89 '*' => [ 'NAME', 'DESCRIPTION', 'COPYRIGHT' ], 90 1 => [ 'SYNOPSIS', 'OPTIONS' ], 91 3 => [ 'SYNOPSIS', 'RETURN VALUES' ], 92 5 => [ ], 93 7 => [ ] 94 ); 95 96# Symbols that we ignored. 97# They are reserved macros that we currently don't document 98my $ignored = qr/(?| ^i2d_ 99 | ^d2i_ 100 | ^DEPRECATEDIN 101 | ^OSSL_DEPRECATED 102 | \Q_fnsig(3)\E$ 103 | ^IMPLEMENT_ 104 | ^_?DECLARE_ 105 | ^sk_ 106 | ^SKM_DEFINE_STACK_OF_INTERNAL 107 | ^lh_ 108 | ^DEFINE_LHASH_OF_INTERNAL 109 )/x; 110 111# A common regexp for C symbol names 112my $C_symbol = qr/\b[[:alpha:]][_[:alnum:]]*\b/; 113 114# Collect all POD files, both internal and public, and regardless of location 115# We collect them in a hash table with each file being a key, so we can attach 116# tags to them. For example, internal docs will have the word "internal" 117# attached to them. 118my %files = (); 119# We collect files names on the fly, on known tag basis 120my %collected_tags = (); 121# We cache results based on tags 122my %collected_results = (); 123 124# files OPTIONS 125# 126# Example: 127# 128# files(TAGS => 'manual'); 129# files(TAGS => [ 'manual', 'man1' ]); 130# 131# This function returns an array of files corresponding to a set of tags 132# given with the options "TAGS". The value of this option can be a single 133# word, or an array of several words, which work as inclusive or exclusive 134# selectors. Inclusive selectors are used to add one more set of files to 135# the returned array, while exclusive selectors limit the set of files added 136# to the array. The recognised tag values are: 137# 138# 'public_manual' - inclusive selector, adds public manuals to the 139# returned array of files. 140# 'internal_manual' - inclusive selector, adds internal manuals to the 141# returned array of files. 142# 'manual' - inclusive selector, adds any manual to the returned 143# array of files. This is really a shorthand for 144# 'public_manual' and 'internal_manual' combined. 145# 'public_header' - inclusive selector, adds public headers to the 146# returned array of files. 147# 'header' - inclusive selector, adds any header file to the 148# returned array of files. Since we currently only 149# care about public headers, this is exactly 150# equivalent to 'public_header', but is present for 151# consistency. 152# 153# 'man1', 'man3', 'man5', 'man7' 154# - exclusive selectors, only applicable together with 155# any of the manual selectors. If any of these are 156# present, only the manuals from the given sections 157# will be included. If none of these are present, 158# the manuals from all sections will be returned. 159# 160# All returned manual files come from configdata.pm. 161# All returned header files come from looking inside 162# "$config{sourcedir}/include/openssl" 163# 164sub files { 165 my %opts = ( @_ ); # Make a copy of the arguments 166 167 $opts{TAGS} = [ $opts{TAGS} ] if ref($opts{TAGS}) eq ''; 168 169 croak "No tags given, or not an array" 170 unless exists $opts{TAGS} && ref($opts{TAGS}) eq 'ARRAY'; 171 172 my %tags = map { $_ => 1 } @{$opts{TAGS}}; 173 $tags{public_manual} = 1 174 if $tags{manual} && ($tags{public} // !$tags{internal}); 175 $tags{internal_manual} = 1 176 if $tags{manual} && ($tags{internal} // !$tags{public}); 177 $tags{public_header} = 1 178 if $tags{header} && ($tags{public} // !$tags{internal}); 179 delete $tags{manual}; 180 delete $tags{header}; 181 delete $tags{public}; 182 delete $tags{internal}; 183 184 my $tags_as_key = join(':', sort keys %tags); 185 186 cluck "DEBUG[files]: This is how we got here!" if $debug; 187 print STDERR "DEBUG[files]: tags: $tags_as_key\n" if $debug; 188 189 my %tags_to_collect = ( map { $_ => 1 } 190 grep { !exists $collected_tags{$_} } 191 keys %tags ); 192 193 if ($tags_to_collect{public_manual}) { 194 print STDERR "DEBUG[files]: collecting public manuals\n" 195 if $debug; 196 197 # The structure in configdata.pm is that $unified_info{mandocs} 198 # contains lists of man files, and in turn, $unified_info{depends} 199 # contains hash tables showing which POD file each of those man 200 # files depend on. We use that information to find the POD files, 201 # and to attach the man section they belong to as tags 202 foreach my $mansect ( @sections ) { 203 foreach ( map { @{$unified_info{depends}->{$_}} } 204 @{$unified_info{mandocs}->{$mansect}} ) { 205 $files{$_} = { $mansect => 1, public_manual => 1 }; 206 } 207 } 208 $collected_tags{public_manual} = 1; 209 } 210 211 if ($tags_to_collect{internal_manual}) { 212 print STDERR "DEBUG[files]: collecting internal manuals\n" 213 if $debug; 214 215 # We don't have the internal docs in configdata.pm. However, they 216 # are all in the source tree, so they're easy to find. 217 foreach my $mansect ( @sections ) { 218 foreach ( glob(catfile($config{sourcedir}, 219 'doc', 'internal', $mansect, '*.pod')) ) { 220 $files{$_} = { $mansect => 1, internal_manual => 1 }; 221 } 222 } 223 $collected_tags{internal_manual} = 1; 224 } 225 226 if ($tags_to_collect{public_header}) { 227 print STDERR "DEBUG[files]: collecting public headers\n" 228 if $debug; 229 230 foreach ( glob(catfile($config{sourcedir}, 231 'include', 'openssl', '*.h')) ) { 232 $files{$_} = { public_header => 1 }; 233 } 234 } 235 236 my @result = @{$collected_results{$tags_as_key} // []}; 237 238 if (!@result) { 239 # Produce a result based on caller tags 240 foreach my $type ( ( 'public_manual', 'internal_manual' ) ) { 241 next unless $tags{$type}; 242 243 # If caller asked for specific sections, we care about sections. 244 # Otherwise, we give back all of them. 245 my @selected_sections = 246 grep { $tags{$_} } @sections; 247 @selected_sections = @sections unless @selected_sections; 248 249 foreach my $section ( ( @selected_sections ) ) { 250 push @result, 251 ( sort { basename($a) cmp basename($b) } 252 grep { $files{$_}->{$type} && $files{$_}->{$section} } 253 keys %files ); 254 } 255 } 256 if ($tags{public_header}) { 257 push @result, 258 ( sort { basename($a) cmp basename($b) } 259 grep { $files{$_}->{public_header} } 260 keys %files ); 261 } 262 263 if ($debug) { 264 print STDERR "DEBUG[files]: result:\n"; 265 print STDERR "DEBUG[files]: $_\n" foreach @result; 266 } 267 $collected_results{$tags_as_key} = [ @result ]; 268 } 269 270 return @result; 271} 272 273# Print error message, set $status. 274sub err { 275 print join(" ", @_), "\n"; 276 $status = 1 277} 278 279# Cross-check functions in the NAME and SYNOPSIS section. 280sub name_synopsis { 281 my $id = shift; 282 my $filename = shift; 283 my $contents = shift; 284 285 # Get NAME section and all words in it. 286 return unless $contents =~ /=head1 NAME(.*)=head1 SYNOPSIS/ms; 287 my $tmp = $1; 288 $tmp =~ tr/\n/ /; 289 err($id, "Trailing comma before - in NAME") 290 if $tmp =~ /, *-/; 291 $tmp =~ s/ -.*//g; 292 err($id, "POD markup among the names in NAME") 293 if $tmp =~ /[<>]/; 294 $tmp =~ s/ */ /g; 295 err($id, "Missing comma in NAME") 296 if $tmp =~ /[^,] /; 297 298 my $dirname = dirname($filename); 299 my $section = basename($dirname); 300 my $simplename = basename($filename, ".pod"); 301 my $foundfilename = 0; 302 my %foundfilenames = (); 303 my %names; 304 foreach my $n ( split ',', $tmp ) { 305 $n =~ s/^\s+//; 306 $n =~ s/\s+$//; 307 err($id, "The name '$n' contains white-space") 308 if $n =~ /\s/; 309 $names{$n} = 1; 310 $foundfilename++ if $n eq $simplename; 311 $foundfilenames{$n} = 1 312 if ( ( grep { basename($_) eq "$n.pod" } 313 files(TAGS => [ 'manual', $section ]) ) 314 && $n ne $simplename ); 315 } 316 err($id, "The following exist as other .pod files:", 317 sort keys %foundfilenames) 318 if %foundfilenames; 319 err($id, "$simplename (filename) missing from NAME section") 320 unless $foundfilename; 321 322 # Find all functions in SYNOPSIS 323 return unless $contents =~ /=head1 SYNOPSIS(.*)=head1 DESCRIPTION/ms; 324 my $syn = $1; 325 my $ignore_until = undef; # If defined, this is a regexp 326 # Remove all non-code lines 327 $syn =~ s/^(?:\s*?|\S.*?)$//msg; 328 # Remove all comments 329 $syn =~ s/\/\*.*?\*\///msg; 330 while ( $syn ) { 331 # "env" lines end at a newline. 332 # Preprocessor lines start with a # and end at a newline. 333 # Other lines end with a semicolon, and may cover more than 334 # one physical line. 335 if ( $syn !~ /^ \s*(env .*?|#.*?|.*?;)\s*$/ms ) { 336 err($id, "Can't parse rest of synopsis:\n$syn\n(declarations not ending with a semicolon (;)?)"); 337 last; 338 } 339 my $line = $1; 340 $syn = $'; 341 342 print STDERR "DEBUG[name_synopsis] \$line = '$line'\n" if $debug; 343 344 # Special code to skip over documented structures 345 if ( defined $ignore_until) { 346 next if $line !~ /$ignore_until/; 347 $ignore_until = undef; 348 next; 349 } 350 if ( $line =~ /^\s*(?:typedef\s+)?struct(?:\s+\S+)\s*\{/ ) { 351 $ignore_until = qr/\}.*?;/; 352 next; 353 } 354 355 my $sym; 356 my $is_prototype = 1; 357 $line =~ s/LHASH_OF\([^)]+\)/int/g; 358 $line =~ s/STACK_OF\([^)]+\)/int/g; 359 $line =~ s/SPARSE_ARRAY_OF\([^)]+\)/int/g; 360 $line =~ s/__declspec\([^)]+\)//; 361 362 ## We don't prohibit that space, to allow typedefs looking like 363 ## this: 364 ## 365 ## typedef int (fantastically_long_name_breaks_80char_limit) 366 ## (fantastically_long_name_breaks_80char_limit *something); 367 ## 368 #if ( $line =~ /typedef.*\(\*?\S+\)\s+\(/ ) { 369 # # a callback function with whitespace before the argument list: 370 # # typedef ... (*NAME) (... 371 # # typedef ... (NAME) (... 372 # err($id, "Function typedef has space before arg list: $line"); 373 #} 374 375 if ( $line =~ /env (\S*)=/ ) { 376 # environment variable env NAME=... 377 $sym = $1; 378 } elsif ( $line =~ /typedef.*\(\*?($C_symbol)\)\s*\(/ ) { 379 # a callback function pointer: typedef ... (*NAME)(... 380 # a callback function signature: typedef ... (NAME)(... 381 $sym = $1; 382 } elsif ( $line =~ /typedef.*($C_symbol)\s*\(/ ) { 383 # a callback function signature: typedef ... NAME(... 384 $sym = $1; 385 } elsif ( $line =~ /typedef.*($C_symbol);/ ) { 386 # a simple typedef: typedef ... NAME; 387 $is_prototype = 0; 388 $sym = $1; 389 } elsif ( $line =~ /enum ($C_symbol) \{/ ) { 390 # an enumeration: enum ... { 391 $sym = $1; 392 } elsif ( $line =~ /#\s*(?:define|undef) ($C_symbol)/ ) { 393 $is_prototype = 0; 394 $sym = $1; 395 } elsif ( $line =~ /^[^\(]*?\(\*($C_symbol)\s*\(/ ) { 396 # a function returning a function pointer: TYPE (*NAME(args))(args) 397 $sym = $1; 398 } elsif ( $line =~ /^[^\(]*?($C_symbol)\s*\(/ ) { 399 # a simple function declaration 400 $sym = $1; 401 } 402 else { 403 next; 404 } 405 406 print STDERR "DEBUG[name_synopsis] \$sym = '$sym'\n" if $debug; 407 408 err($id, "$sym missing from NAME section") 409 unless defined $names{$sym}; 410 $names{$sym} = 2; 411 412 # Do some sanity checks on the prototype. 413 err($id, "Prototype missing spaces around commas: $line") 414 if $is_prototype && $line =~ /[a-z0-9],[^\s]/; 415 } 416 417 foreach my $n ( keys %names ) { 418 next if $names{$n} == 2; 419 err($id, "$n missing from SYNOPSIS") 420 } 421} 422 423# Check if SECTION ($3) is located before BEFORE ($4) 424sub check_section_location { 425 my $id = shift; 426 my $contents = shift; 427 my $section = shift; 428 my $before = shift; 429 430 return unless $contents =~ /=head1 $section/ 431 and $contents =~ /=head1 $before/; 432 err($id, "$section should appear before $before section") 433 if $contents =~ /=head1 $before.*=head1 $section/ms; 434} 435 436# Check if a =head1 is duplicated, or a =headX is duplicated within a 437# =head1. Treats =head2 =head3 as equivalent -- it doesn't reset the head3 438# sets if it finds a =head2 -- but that is good enough for now. Also check 439# for proper capitalization, trailing periods, etc. 440sub check_head_style { 441 my $id = shift; 442 my $contents = shift; 443 my %head1; 444 my %subheads; 445 446 foreach my $line ( split /\n+/, $contents ) { 447 next unless $line =~ /^=head/; 448 if ( $line =~ /head1/ ) { 449 err($id, "Duplicate section $line") 450 if defined $head1{$line}; 451 $head1{$line} = 1; 452 %subheads = (); 453 } else { 454 err($id, "Duplicate subsection $line") 455 if defined $subheads{$line}; 456 $subheads{$line} = 1; 457 } 458 err($id, "Period in =head") 459 if $line =~ /\.[^\w]/ or $line =~ /\.$/; 460 err($id, "not all uppercase in =head1") 461 if $line =~ /head1.*[a-z]/; 462 err($id, "All uppercase in subhead") 463 if $line =~ /head[234][ A-Z0-9]+$/; 464 } 465} 466 467# Because we have options and symbols with extra markup, we need 468# to take that into account, so we need a regexp that extracts 469# markup chunks, including recursive markup. 470# please read up on /(?R)/ in perlre(1) 471# (note: order is important, (?R) needs to come before .) 472# (note: non-greedy is important, or something like 'B<foo> and B<bar>' 473# will be captured as one item) 474my $markup_re = 475 qr/( # Capture group 476 [BIL]< # The start of what we recurse on 477 (?:(?-1)|.)*? # recurse the whole regexp (referring to 478 # the last opened capture group, i.e. the 479 # start of this regexp), or pick next 480 # character. Do NOT be greedy! 481 > # The end of what we recurse on 482 )/x; # (the x allows this sort of split up regexp) 483 484# Options must start with a dash, followed by a letter, possibly 485# followed by letters, digits, dashes and underscores, and the last 486# character must be a letter or a digit. 487# We do also accept the single -? or -n, where n is a digit 488my $option_re = 489 qr/(?: 490 \? # Single question mark 491 | 492 \d # Single digit 493 | 494 - # Single dash (--) 495 | 496 [[:alpha:]](?:[-_[:alnum:]]*?[[:alnum:]])? 497 )/x; 498 499# Helper function to check if a given $thing is properly marked up 500# option. It returns one of these values: 501# undef if it's not an option 502# "" if it's a malformed option 503# $unwrapped the option with the outermost B<> wrapping removed. 504sub normalise_option { 505 my $id = shift; 506 my $filename = shift; 507 my $thing = shift; 508 509 my $unwrapped = $thing; 510 my $unmarked = $thing; 511 512 # $unwrapped is the option with the outer B<> markup removed 513 $unwrapped =~ s/^B<//; 514 $unwrapped =~ s/>$//; 515 # $unmarked is the option with *all* markup removed 516 $unmarked =~ s/[BIL]<|>//msg; 517 518 519 # If we found an option, check it, collect it 520 if ( $unwrapped =~ /^\s*-/ ) { 521 return $unwrapped # return option with outer B<> removed 522 if $unmarked =~ /^-${option_re}$/; 523 return ""; # Malformed option 524 } 525 return undef; # Something else 526} 527 528# Checks of command option (man1) formatting. The man1 checks are 529# restricted to the SYNOPSIS and OPTIONS sections, the rest is too 530# free form, we simply cannot be too strict there. 531 532sub option_check { 533 my $id = shift; 534 my $filename = shift; 535 my $contents = shift; 536 537 my $synopsis = ($contents =~ /=head1\s+SYNOPSIS(.*?)=head1/s, $1); 538 539 # Some pages have more than one OPTIONS section, let's make sure 540 # to get them all 541 my $options = ''; 542 while ( $contents =~ /=head1\s+[A-Z ]*?OPTIONS$(.*?)(?==head1)/msg ) { 543 $options .= $1; 544 } 545 546 # Look for options with no or incorrect markup 547 while ( $synopsis =~ 548 /(?<![-<[:alnum:]])-(?:$markup_re|.)*(?![->[:alnum:]])/msg ) { 549 err($id, "Malformed option [1] in SYNOPSIS: $&"); 550 } 551 552 my @synopsis; 553 while ( $synopsis =~ /$markup_re/msg ) { 554 my $found = $&; 555 push @synopsis, $found if $found =~ /^B<-/; 556 print STDERR "$id:DEBUG[option_check] SYNOPSIS: found $found\n" 557 if $debug; 558 my $option_uw = normalise_option($id, $filename, $found); 559 err($id, "Malformed option [2] in SYNOPSIS: $found") 560 if defined $option_uw && $option_uw eq ''; 561 } 562 563 # In OPTIONS, we look for =item paragraphs. 564 # (?=^\s*$) detects an empty line. 565 my @options; 566 while ( $options =~ /=item\s+(.*?)(?=^\s*$)/msg ) { 567 my $item = $&; 568 569 while ( $item =~ /(\[\s*)?($markup_re)/msg ) { 570 my $found = $2; 571 print STDERR "$id:DEBUG[option_check] OPTIONS: found $&\n" 572 if $debug; 573 err($id, "Unexpected bracket in OPTIONS =item: $item") 574 if ($1 // '') ne '' && $found =~ /^B<\s*-/; 575 576 my $option_uw = normalise_option($id, $filename, $found); 577 err($id, "Malformed option in OPTIONS: $found") 578 if defined $option_uw && $option_uw eq ''; 579 if ($found =~ /^B<-/) { 580 push @options, $found; 581 err($id, "OPTIONS entry $found missing from SYNOPSIS") 582 unless (grep /^\Q$found\E$/, @synopsis) 583 || $id =~ /(openssl|-options)\.pod:1:$/; 584 } 585 } 586 } 587 foreach (@synopsis) { 588 my $option = $_; 589 err($id, "SYNOPSIS entry $option missing from OPTIONS") 590 unless (grep /^\Q$option\E$/, @options); 591 } 592} 593 594# Normal symbol form 595my $symbol_re = qr/[[:alpha:]_][_[:alnum:]]*?/; 596 597# Checks of function name (man3) formatting. The man3 checks are 598# easier than the man1 checks, we only check the names followed by (), 599# and only the names that have POD markup. 600sub functionname_check { 601 my $id = shift; 602 my $filename = shift; 603 my $contents = shift; 604 605 while ( $contents =~ /($markup_re)\(\)/msg ) { 606 print STDERR "$id:DEBUG[functionname_check] SYNOPSIS: found $&\n" 607 if $debug; 608 609 my $symbol = $1; 610 my $unmarked = $symbol; 611 $unmarked =~ s/[BIL]<|>//msg; 612 613 err($id, "Malformed symbol: $symbol") 614 unless $symbol =~ /^B<.*?>$/ && $unmarked =~ /^${symbol_re}$/ 615 } 616 617 # We can't do the kind of collecting coolness that option_check() 618 # does, because there are too many things that can't be found in 619 # name repositories like the NAME sections, such as symbol names 620 # with a variable part (typically marked up as B<foo_I<TYPE>_bar> 621} 622 623# This is from http://man7.org/linux/man-pages/man7/man-pages.7.html 624my %preferred_words = ( 625 '16bit' => '16-bit', 626 'a.k.a.' => 'aka', 627 'bitmask' => 'bit mask', 628 'builtin' => 'built-in', 629 #'epoch' => 'Epoch', # handled specially, below 630 'fall-back' => 'fallback', 631 'file name' => 'filename', 632 'file system' => 'filesystem', 633 'host name' => 'hostname', 634 'i-node' => 'inode', 635 'lower case' => 'lowercase', 636 'lower-case' => 'lowercase', 637 'manpage' => 'man page', 638 'non-blocking' => 'nonblocking', 639 'non-default' => 'nondefault', 640 'non-empty' => 'nonempty', 641 'non-negative' => 'nonnegative', 642 'non-zero' => 'nonzero', 643 'path name' => 'pathname', 644 'pre-allocated' => 'preallocated', 645 'pseudo-terminal' => 'pseudoterminal', 646 'real time' => 'real-time', 647 'realtime' => 'real-time', 648 'reserved port' => 'privileged port', 649 'runtime' => 'run time', 650 'saved group ID'=> 'saved set-group-ID', 651 'saved set-GID' => 'saved set-group-ID', 652 'saved set-UID' => 'saved set-user-ID', 653 'saved user ID' => 'saved set-user-ID', 654 'set-GID' => 'set-group-ID', 655 'set-UID' => 'set-user-ID', 656 'setgid' => 'set-group-ID', 657 'setuid' => 'set-user-ID', 658 'sub-system' => 'subsystem', 659 'super block' => 'superblock', 660 'super-block' => 'superblock', 661 'super user' => 'superuser', 662 'super-user' => 'superuser', 663 'system port' => 'privileged port', 664 'time stamp' => 'timestamp', 665 'time zone' => 'timezone', 666 'upper case' => 'uppercase', 667 'upper-case' => 'uppercase', 668 'useable' => 'usable', 669 'user name' => 'username', 670 'userspace' => 'user space', 671 'zeroes' => 'zeros' 672); 673 674# Search manpage for words that have a different preferred use. 675sub wording { 676 my $id = shift; 677 my $contents = shift; 678 679 foreach my $k ( keys %preferred_words ) { 680 # Sigh, trademark 681 next if $k eq 'file system' 682 and $contents =~ /Microsoft Encrypted File System/; 683 err($id, "Found '$k' should use '$preferred_words{$k}'") 684 if $contents =~ /\b\Q$k\E\b/i; 685 } 686 err($id, "Found 'epoch' should use 'Epoch'") 687 if $contents =~ /\bepoch\b/; 688 if ( $id =~ m@man1/@ ) { 689 err($id, "found 'tool' in NAME, should use 'command'") 690 if $contents =~ /=head1 NAME.*\btool\b.*=head1 SYNOPSIS/s; 691 err($id, "found 'utility' in NAME, should use 'command'") 692 if $contents =~ /NAME.*\butility\b.*=head1 SYNOPSIS/s; 693 694 } 695} 696 697# Perform all sorts of nit/error checks on a manpage 698sub check { 699 my %podinfo = @_; 700 my $filename = $podinfo{filename}; 701 my $dirname = basename(dirname($filename)); 702 my $contents = $podinfo{contents}; 703 704 # Find what section this page is in; presume 3. 705 my $mansect = 3; 706 $mansect = $1 if $filename =~ /man([1-9])/; 707 708 my $id = "${filename}:1:"; 709 check_head_style($id, $contents); 710 711 # Check ordering of some sections in man3 712 if ( $mansect == 3 ) { 713 check_section_location($id, $contents, "RETURN VALUES", "EXAMPLES"); 714 check_section_location($id, $contents, "SEE ALSO", "HISTORY"); 715 check_section_location($id, $contents, "EXAMPLES", "SEE ALSO"); 716 } 717 718 # Make sure every link has a man section number. 719 while ( $contents =~ /$markup_re/msg ) { 720 my $target = $1; 721 next unless $target =~ /^L<(.*)>$/; # Skip if not L<...> 722 $target = $1; # Peal away L< and > 723 $target =~ s/\/[^\/]*$//; # Peal away possible anchor 724 $target =~ s/.*\|//g; # Peal away possible link text 725 next if $target eq ''; # Skip if links within page, or 726 next if $target =~ /::/; # links to a Perl module, or 727 next if $target =~ /^https?:/; # is a URL link, or 728 next if $target =~ /\([1357]\)$/; # it has a section 729 err($id, "Missing man section number (likely, $mansect) in L<$target>") 730 } 731 # Check for proper links to commands. 732 while ( $contents =~ /L<([^>]*)\(1\)(?:\/.*)?>/g ) { 733 my $target = $1; 734 next if $target =~ /openssl-?/; 735 next if ( grep { basename($_) eq "$target.pod" } 736 files(TAGS => [ 'manual', 'man1' ]) ); 737 next if $target =~ /ps|apropos|sha1sum|procmail|perl/; 738 err($id, "Bad command link L<$target(1)>") if grep /man1/, @sections; 739 } 740 # Check for proper in-man-3 API links. 741 while ( $contents =~ /L<([^>]*)\(3\)(?:\/.*)?>/g ) { 742 my $target = $1; 743 err($id, "Bad L<$target>") 744 unless $target =~ /^[_[:alpha:]][_[:alnum:]]*$/ 745 } 746 747 unless ( $contents =~ /^=for openssl generic/ms ) { 748 if ( $mansect == 3 ) { 749 name_synopsis($id, $filename, $contents); 750 functionname_check($id, $filename, $contents); 751 } elsif ( $mansect == 1 ) { 752 option_check($id, $filename, $contents) 753 } 754 } 755 756 wording($id, $contents); 757 758 err($id, "Doesn't start with =pod") 759 if $contents !~ /^=pod/; 760 err($id, "Doesn't end with =cut") 761 if $contents !~ /=cut\n$/; 762 err($id, "More than one cut line.") 763 if $contents =~ /=cut.*=cut/ms; 764 err($id, "EXAMPLE not EXAMPLES section.") 765 if $contents =~ /=head1 EXAMPLE[^S]/; 766 err($id, "WARNING not WARNINGS section.") 767 if $contents =~ /=head1 WARNING[^S]/; 768 err($id, "Missing copyright") 769 if $contents !~ /Copyright .* The OpenSSL Project Authors/; 770 err($id, "Copyright not last") 771 if $contents =~ /head1 COPYRIGHT.*=head/ms; 772 err($id, "head2 in All uppercase") 773 if $contents =~ /head2\s+[A-Z ]+\n/; 774 err($id, "Extra space after head") 775 if $contents =~ /=head\d\s\s+/; 776 err($id, "Period in NAME section") 777 if $contents =~ /=head1 NAME.*\.\n.*=head1 SYNOPSIS/ms; 778 err($id, "Duplicate $1 in L<>") 779 if $contents =~ /L<([^>]*)\|([^>]*)>/ && $1 eq $2; 780 err($id, "Bad =over $1") 781 if $contents =~ /=over([^ ][^24])/; 782 err($id, "Possible version style issue") 783 if $contents =~ /OpenSSL version [019]/; 784 785 if ( $contents !~ /=for openssl multiple includes/ ) { 786 # Look for multiple consecutive openssl #include lines 787 # (non-consecutive lines are okay; see man3/MD5.pod). 788 if ( $contents =~ /=head1 SYNOPSIS(.*)=head1 DESCRIPTION/ms ) { 789 my $count = 0; 790 foreach my $line ( split /\n+/, $1 ) { 791 if ( $line =~ m@include <openssl/@ ) { 792 err($id, "Has multiple includes") 793 if ++$count == 2; 794 } else { 795 $count = 0; 796 } 797 } 798 } 799 } 800 801 open my $OUT, '>', $temp 802 or die "Can't open $temp, $!"; 803 err($id, "POD errors") 804 if podchecker($filename, $OUT) != 0; 805 close $OUT; 806 open $OUT, '<', $temp 807 or die "Can't read $temp, $!"; 808 while ( <$OUT> ) { 809 next if /\(section\) in.*deprecated/; 810 print; 811 } 812 close $OUT; 813 unlink $temp || warn "Can't remove $temp, $!"; 814 815 # Find what section this page is in; presume 3. 816 my $section = 3; 817 $section = $1 if $dirname =~ /man([1-9])/; 818 819 foreach ( (@{$mandatory_sections{'*'}}, @{$mandatory_sections{$section}}) ) { 820 err($id, "Missing $_ head1 section") 821 if $contents !~ /^=head1\s+${_}\s*$/m; 822 } 823} 824 825# Information database ############################################### 826 827# Map of links in each POD file; filename => [ "foo(1)", "bar(3)", ... ] 828my %link_map = (); 829# Map of names in each POD file or from "missing" files; possible values are: 830# If found in a POD files, "name(s)" => filename 831# If found in a "missing" file or external, "name(s)" => '' 832my %name_map = (); 833 834# State of man-page names. 835# %state is affected by loading util/*.num and util/*.syms 836# Values may be one of: 837# 'crypto' : belongs in libcrypto (loaded from libcrypto.num) 838# 'ssl' : belongs in libssl (loaded from libssl.num) 839# 'other' : belongs in libcrypto or libssl (loaded from other.syms) 840# 'internal' : Internal 841# 'public' : Public (generic name or external documentation) 842# Any of these values except 'public' may be prefixed with 'missing_' 843# to indicate that they are known to be missing. 844my %state; 845# %missing is affected by loading util/missing*.txt. Values may be one of: 846# 'crypto' : belongs in libcrypto (loaded from libcrypto.num) 847# 'ssl' : belongs in libssl (loaded from libssl.num) 848# 'other' : belongs in libcrypto or libssl (loaded from other.syms) 849# 'internal' : Internal 850my %missing; 851 852# Parse libcrypto.num, etc., and return sorted list of what's there. 853sub loadnum ($;$) { 854 my $file = shift; 855 my $type = shift; 856 my @symbols; 857 858 open my $IN, '<', catfile($config{sourcedir}, $file) 859 or die "Can't open $file, $!, stopped"; 860 861 while ( <$IN> ) { 862 next if /^#/; 863 next if /\bNOEXIST\b/; 864 my @fields = split(); 865 die "Malformed line $. in $file: $_" 866 if scalar @fields != 2 && scalar @fields != 4; 867 $state{$fields[0].'(3)'} = $type // 'internal'; 868 } 869 close $IN; 870} 871 872# Load file of symbol names that we know aren't documented. 873sub loadmissing($;$) 874{ 875 my $missingfile = shift; 876 my $type = shift; 877 878 open FH, catfile($config{sourcedir}, $missingfile) 879 or die "Can't open $missingfile"; 880 while ( <FH> ) { 881 chomp; 882 next if /^#/; 883 $missing{$_} = $type // 'internal'; 884 } 885 close FH; 886} 887 888# Check that we have consistent public / internal documentation and declaration 889sub checkstate () { 890 # Collect all known names, no matter where they come from 891 my %names = map { $_ => 1 } (keys %name_map, keys %state, keys %missing); 892 893 # Check section 3, i.e. functions and macros 894 foreach ( grep { $_ =~ /\(3\)$/ } sort keys %names ) { 895 next if ( $name_map{$_} // '') eq '' || $_ =~ /$ignored/; 896 897 # If a man-page isn't recorded public or if it's recorded missing 898 # and internal, it's declared to be internal. 899 my $declared_internal = 900 ($state{$_} // 'internal') eq 'internal' 901 || ($missing{$_} // '') eq 'internal'; 902 # If a man-page isn't recorded internal or if it's recorded missing 903 # and not internal, it's declared to be public 904 my $declared_public = 905 ($state{$_} // 'internal') ne 'internal' 906 || ($missing{$_} // 'internal') ne 'internal'; 907 908 err("$_ is supposedly public but is documented as internal") 909 if ( $declared_public && $name_map{$_} =~ /\/internal\// ); 910 err("$_ is supposedly internal (maybe missing from other.syms) but is documented as public") 911 if ( $declared_internal && $name_map{$_} !~ /\/internal\// ); 912 } 913} 914 915# Check for undocumented macros; ignore those in the "missing" file 916# and do simple check for #define in our header files. 917sub checkmacros { 918 my $count = 0; 919 my %seen; 920 921 foreach my $f ( files(TAGS => 'public_header') ) { 922 # Skip some internals we don't want to document yet. 923 my $b = basename($f); 924 next if $b eq 'asn1.h'; 925 next if $b eq 'asn1t.h'; 926 next if $b eq 'err.h'; 927 open(IN, $f) 928 or die "Can't open $f, $!"; 929 while ( <IN> ) { 930 next unless /^#\s*define\s*(\S+)\(/; 931 my $macro = "$1(3)"; # We know they're all in section 3 932 next if defined $name_map{$macro} 933 || defined $missing{$macro} 934 || defined $seen{$macro} 935 || $macro =~ /$ignored/; 936 937 err("$f:", "macro $macro undocumented") 938 if $opt_d || $opt_e; 939 $count++; 940 $seen{$macro} = 1; 941 } 942 close(IN); 943 } 944 err("# $count macros undocumented (count is approximate)") 945 if $count > 0; 946} 947 948# Find out what is undocumented (filtering out the known missing ones) 949# and display them. 950sub printem ($) { 951 my $type = shift; 952 my $count = 0; 953 954 foreach my $func ( grep { $state{$_} eq $type } sort keys %state ) { 955 next if defined $name_map{$func} 956 || defined $missing{$func}; 957 958 err("$type:", "function $func undocumented") 959 if $opt_d || $opt_e; 960 $count++; 961 } 962 err("# $count lib$type names are not documented") 963 if $count > 0; 964} 965 966# Collect all the names in a manpage. 967sub collectnames { 968 my %podinfo = @_; 969 my $filename = $podinfo{filename}; 970 $filename =~ m|man(\d)/|; 971 my $section = $1; 972 my $simplename = basename($filename, ".pod"); 973 my $id = "${filename}:1:"; 974 my $is_generic = $podinfo{contents} =~ /^=for openssl generic/ms; 975 976 unless ( grep { $simplename eq $_ } @{$podinfo{names}} ) { 977 err($id, "$simplename not in NAME section"); 978 push @{$podinfo{names}}, $simplename; 979 } 980 foreach my $name ( @{$podinfo{names}} ) { 981 next if $name eq ""; 982 err($id, "'$name' contains whitespace") 983 if $name =~ /\s/; 984 my $name_sec = "$name($section)"; 985 if ( !defined $name_map{$name_sec} ) { 986 $name_map{$name_sec} = $filename; 987 $state{$name_sec} //= 988 ( $filename =~ /\/internal\// ? 'internal' : 'public' ) 989 if $is_generic; 990 } elsif ( $filename eq $name_map{$name_sec} ) { 991 err($id, "$name_sec duplicated in NAME section of", 992 $name_map{$name_sec}); 993 } elsif ( $name_map{$name_sec} ne '' ) { 994 err($id, "$name_sec also in NAME section of", 995 $name_map{$name_sec}); 996 } 997 } 998 999 if ( $podinfo{contents} =~ /=for openssl foreign manual (.*)\n/ ) { 1000 foreach my $f ( split / /, $1 ) { 1001 $name_map{$f} = ''; # It still exists! 1002 $state{$f} = 'public'; # We assume! 1003 } 1004 } 1005 1006 my @links = (); 1007 # Don't use this regexp directly on $podinfo{contents}, as it causes 1008 # a regexp recursion, which fails on really big PODs. Instead, use 1009 # $markup_re to pick up general markup, and use this regexp to check 1010 # that the markup that was found is indeed a link. 1011 my $linkre = qr/L< 1012 # if the link is of the form L<something|name(s)>, 1013 # then remove 'something'. Note that 'something' 1014 # may contain POD codes as well... 1015 (?:(?:[^\|]|<[^>]*>)*\|)? 1016 # we're only interested in references that have 1017 # a one digit section number 1018 ([^\/>\(]+\(\d\)) 1019 /x; 1020 while ( $podinfo{contents} =~ /$markup_re/msg ) { 1021 my $x = $1; 1022 1023 if ($x =~ $linkre) { 1024 push @links, $1; 1025 } 1026 } 1027 $link_map{$filename} = [ @links ]; 1028} 1029 1030# Look for L<> ("link") references that point to files that do not exist. 1031sub checklinks { 1032 foreach my $filename ( sort keys %link_map ) { 1033 foreach my $link ( @{$link_map{$filename}} ) { 1034 err("${filename}:1:", "reference to non-existing $link") 1035 unless defined $name_map{$link} || defined $missing{$link}; 1036 err("${filename}:1:", "reference of internal $link in public documentation $filename") 1037 if ( ( ($state{$link} // '') eq 'internal' 1038 || ($missing{$link} // '') eq 'internal' ) 1039 && $filename !~ /\/internal\// ); 1040 } 1041 } 1042} 1043 1044# Cipher/digests to skip if they show up as "not implemented" 1045# because they are, via the "-*" construct. 1046my %skips = ( 1047 'aes128' => 1, 1048 'aes192' => 1, 1049 'aes256' => 1, 1050 'aria128' => 1, 1051 'aria192' => 1, 1052 'aria256' => 1, 1053 'camellia128' => 1, 1054 'camellia192' => 1, 1055 'camellia256' => 1, 1056 'des' => 1, 1057 'des3' => 1, 1058 'idea' => 1, 1059 'cipher' => 1, 1060 'digest' => 1, 1061); 1062 1063my %genopts; # generic options parsed from apps/include/opt.h 1064 1065# Check the flags of a command and see if everything is in the manpage 1066sub checkflags { 1067 my $cmd = shift; 1068 my $doc = shift; 1069 my @cmdopts; 1070 my %docopts; 1071 1072 # Get the list of options in the command source file. 1073 my $active = 0; 1074 my $expect_helpstr = ""; 1075 open CFH, "apps/$cmd.c" 1076 or die "Can't open apps/$cmd.c to list options for $cmd, $!"; 1077 while ( <CFH> ) { 1078 chop; 1079 if ($active) { 1080 last if m/^\s*};/; 1081 if ($expect_helpstr ne "") { 1082 next if m/^\s*#\s*if/; 1083 err("$cmd does not implement help for -$expect_helpstr") unless m/^\s*"/; 1084 $expect_helpstr = ""; 1085 } 1086 if (m/\{\s*"([^"]+)"\s*,\s*OPT_[A-Z0-9_]+\s*,\s*('[-\/:<>cEfFlMnNpsuU]'|0)(.*)$/ 1087 && !($cmd eq "s_client" && $1 eq "wdebug")) { 1088 push @cmdopts, $1; 1089 $expect_helpstr = $1; 1090 $expect_helpstr = "" if $3 =~ m/^\s*,\s*"/; 1091 } elsif (m/[\s,](OPT_[A-Z]+_OPTIONS?)\s*(,|$)/) { 1092 push @cmdopts, @{ $genopts{$1} }; 1093 } 1094 } elsif (m/^const\s+OPTIONS\s*/) { 1095 $active = 1; 1096 } 1097 } 1098 close CFH; 1099 1100 # Get the list of flags from the synopsis 1101 open CFH, "<$doc" 1102 or die "Can't open $doc, $!"; 1103 while ( <CFH> ) { 1104 chop; 1105 last if /DESCRIPTION/; 1106 my $opt; 1107 if ( /\[B<-([^ >]+)/ ) { 1108 $opt = $1; 1109 } elsif ( /^B<-([^ >]+)/ ) { 1110 $opt = $1; 1111 } else { 1112 next; 1113 } 1114 $opt = $1 if $opt =~ /I<(.*)/; 1115 $docopts{$1} = 1; 1116 } 1117 close CFH; 1118 1119 # See what's in the command not the manpage. 1120 my @undocced = sort grep { !defined $docopts{$_} } @cmdopts; 1121 foreach ( @undocced ) { 1122 err("$doc: undocumented $cmd option -$_"); 1123 } 1124 1125 # See what's in the command not the manpage. 1126 my @unimpl = sort grep { my $e = $_; !(grep /^\Q$e\E$/, @cmdopts) } keys %docopts; 1127 foreach ( @unimpl ) { 1128 next if $_ eq "-"; # Skip the -- end-of-flags marker 1129 next if defined $skips{$_}; 1130 err("$doc: $cmd does not implement -$_"); 1131 } 1132} 1133 1134## 1135## MAIN() 1136## Do the work requested by the various getopt flags. 1137## The flags are parsed in alphabetical order, just because we have 1138## to have *some way* of listing them. 1139## 1140 1141if ( $opt_c ) { 1142 my @commands = (); 1143 1144 # Get the lists of generic options. 1145 my $active = ""; 1146 open OFH, catdir($config{sourcedir}, "apps/include/opt.h") 1147 or die "Can't open apps/include/opt.h to list generic options, $!"; 1148 while ( <OFH> ) { 1149 chop; 1150 push @{ $genopts{$active} }, $1 if $active ne "" && m/^\s+\{\s*"([^"]+)"\s*,\s*OPT_/; 1151 $active = $1 if m/^\s*#\s*define\s+(OPT_[A-Z]+_OPTIONS?)\s*\\\s*$/; 1152 $active = "" if m/^\s*$/; 1153 } 1154 close OFH; 1155 1156 # Get list of commands. 1157 opendir(DIR, "apps"); 1158 @commands = grep(/\.c$/, readdir(DIR)); 1159 closedir(DIR); 1160 1161 # See if each has a manpage. 1162 foreach my $cmd ( @commands ) { 1163 $cmd =~ s/\.c$//; 1164 next if $cmd eq 'progs' || $cmd eq 'vms_decc_init'; 1165 my @doc = ( grep { basename($_) eq "openssl-$cmd.pod" 1166 # For "tsget" and "CA.pl" pod pages 1167 || basename($_) eq "$cmd.pod" } 1168 files(TAGS => [ 'manual', 'man1' ]) ); 1169 my $num = scalar @doc; 1170 if ($num > 1) { 1171 err("$num manuals for 'openssl $cmd': ".join(", ", @doc)); 1172 } elsif ($num < 1) { 1173 err("no manual for 'openssl $cmd'"); 1174 } else { 1175 checkflags($cmd, @doc); 1176 } 1177 } 1178} 1179 1180# Populate %state 1181loadnum('util/libcrypto.num', 'crypto'); 1182loadnum('util/libssl.num', 'ssl'); 1183loadnum('util/other.syms', 'other'); 1184loadnum('util/other-internal.syms'); 1185if ( $opt_o ) { 1186 loadmissing('util/missingmacro111.txt', 'crypto'); 1187 loadmissing('util/missingcrypto111.txt', 'crypto'); 1188 loadmissing('util/missingssl111.txt', 'ssl'); 1189} elsif ( !$opt_u ) { 1190 loadmissing('util/missingmacro.txt', 'crypto'); 1191 loadmissing('util/missingcrypto.txt', 'crypto'); 1192 loadmissing('util/missingssl.txt', 'ssl'); 1193 loadmissing('util/missingcrypto-internal.txt'); 1194 loadmissing('util/missingssl-internal.txt'); 1195} 1196 1197if ( $opt_n || $opt_l || $opt_u || $opt_v ) { 1198 my @files_to_read = ( $opt_n && @ARGV ) ? @ARGV : files(TAGS => 'manual'); 1199 1200 foreach (@files_to_read) { 1201 my %podinfo = extract_pod_info($_, { debug => $debug }); 1202 1203 collectnames(%podinfo) 1204 if ( $opt_l || $opt_u || $opt_v ); 1205 1206 check(%podinfo) 1207 if ( $opt_n ); 1208 } 1209} 1210 1211if ( $opt_l ) { 1212 checklinks(); 1213} 1214 1215if ( $opt_n ) { 1216 # If not given args, check that all man1 commands are named properly. 1217 if ( scalar @ARGV == 0 && grep /man1/, @sections ) { 1218 foreach ( files(TAGS => [ 'public_manual', 'man1' ]) ) { 1219 next if /openssl\.pod/ 1220 || /CA\.pl/ || /tsget\.pod/; # these commands are special cases 1221 err("$_ doesn't start with openssl-") unless /openssl-/; 1222 } 1223 } 1224} 1225 1226checkstate(); 1227 1228if ( $opt_u || $opt_v) { 1229 printem('crypto'); 1230 printem('ssl'); 1231 checkmacros(); 1232} 1233 1234exit $status; 1235