OpenSSL/Util/Pod.pm

*e0c4386eSCy Schubert# Copyright 2016-2021 The OpenSSL Project Authors. All Rights Reserved.
*e0c4386eSCy Schubert#
*e0c4386eSCy Schubert# Licensed under the Apache License 2.0 (the "License").  You may not use
*e0c4386eSCy Schubert# this file except in compliance with the License.  You can obtain a copy
*e0c4386eSCy Schubert# in the file LICENSE in the source distribution or at
*e0c4386eSCy Schubert# https://www.openssl.org/source/license.html
*e0c4386eSCy Schubert
*e0c4386eSCy Schubertpackage OpenSSL::Util::Pod;
*e0c4386eSCy Schubert
*e0c4386eSCy Schubertuse strict;
*e0c4386eSCy Schubertuse warnings;
*e0c4386eSCy Schubert
*e0c4386eSCy Schubertuse Exporter;
*e0c4386eSCy Schubertuse vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
*e0c4386eSCy Schubert$VERSION = "0.1";
*e0c4386eSCy Schubert@ISA = qw(Exporter);
*e0c4386eSCy Schubert@EXPORT = qw(extract_pod_info);
*e0c4386eSCy Schubert@EXPORT_OK = qw();
*e0c4386eSCy Schubert
*e0c4386eSCy Schubert=head1 NAME
*e0c4386eSCy Schubert
*e0c4386eSCy SchubertOpenSSL::Util::Pod - utilities to manipulate .pod files
*e0c4386eSCy Schubert
*e0c4386eSCy Schubert=head1 SYNOPSIS
*e0c4386eSCy Schubert
*e0c4386eSCy Schubert  use OpenSSL::Util::Pod;
*e0c4386eSCy Schubert
*e0c4386eSCy Schubert  my %podinfo = extract_pod_info("foo.pod");
*e0c4386eSCy Schubert
*e0c4386eSCy Schubert  # or if the file is already opened...  Note that this consumes the
*e0c4386eSCy Schubert  # remainder of the file.
*e0c4386eSCy Schubert
*e0c4386eSCy Schubert  my %podinfo = extract_pod_info(\*STDIN);
*e0c4386eSCy Schubert
*e0c4386eSCy Schubert=head1 DESCRIPTION
*e0c4386eSCy Schubert
*e0c4386eSCy Schubert=over
*e0c4386eSCy Schubert
*e0c4386eSCy Schubert=item B<extract_pod_info "FILENAME", HASHREF>
*e0c4386eSCy Schubert
*e0c4386eSCy Schubert=item B<extract_pod_info "FILENAME">
*e0c4386eSCy Schubert
*e0c4386eSCy Schubert=item B<extract_pod_info GLOB, HASHREF>
*e0c4386eSCy Schubert
*e0c4386eSCy Schubert=item B<extract_pod_info GLOB>
*e0c4386eSCy Schubert
*e0c4386eSCy SchubertExtracts information from a .pod file, given a STRING (file name) or a
*e0c4386eSCy SchubertGLOB (a file handle).  The result is given back as a hash table.
*e0c4386eSCy Schubert
*e0c4386eSCy SchubertThe additional hash is for extra parameters:
*e0c4386eSCy Schubert
*e0c4386eSCy Schubert=over
*e0c4386eSCy Schubert
*e0c4386eSCy Schubert=item B<section =E<gt> N>
*e0c4386eSCy Schubert
*e0c4386eSCy SchubertThe value MUST be a number, and will be the man section number
*e0c4386eSCy Schubertto be used with the given .pod file.
*e0c4386eSCy Schubert
*e0c4386eSCy Schubert=item B<debug =E<gt> 0|1>
*e0c4386eSCy Schubert
*e0c4386eSCy SchubertIf set to 1, extra debug text will be printed on STDERR
*e0c4386eSCy Schubert
*e0c4386eSCy Schubert=back
*e0c4386eSCy Schubert
*e0c4386eSCy Schubert=back
*e0c4386eSCy Schubert
*e0c4386eSCy Schubert=head1 RETURN VALUES
*e0c4386eSCy Schubert
*e0c4386eSCy Schubert=over
*e0c4386eSCy Schubert
*e0c4386eSCy Schubert=item B<extract_pod_info> returns a hash table with the following
*e0c4386eSCy Schubertitems:
*e0c4386eSCy Schubert
*e0c4386eSCy Schubert=over
*e0c4386eSCy Schubert
*e0c4386eSCy Schubert=item B<section =E<gt> N>
*e0c4386eSCy Schubert
*e0c4386eSCy SchubertThe man section number this .pod file belongs to.  Often the same as
*e0c4386eSCy Schubertwas given as input.
*e0c4386eSCy Schubert
*e0c4386eSCy Schubert=item B<names =E<gt> [ "name", ... ]>
*e0c4386eSCy Schubert
*e0c4386eSCy SchubertAll the names extracted from the NAME section.
*e0c4386eSCy Schubert
*e0c4386eSCy Schubert=item B<contents =E<gt> "...">
*e0c4386eSCy Schubert
*e0c4386eSCy SchubertThe whole contents of the .pod file.
*e0c4386eSCy Schubert
*e0c4386eSCy Schubert=back
*e0c4386eSCy Schubert
*e0c4386eSCy Schubert=back
*e0c4386eSCy Schubert
*e0c4386eSCy Schubert=cut
*e0c4386eSCy Schubert
*e0c4386eSCy Schubertsub extract_pod_info {
*e0c4386eSCy Schubert    my $input = shift;
*e0c4386eSCy Schubert    my $defaults_ref = shift || {};
*e0c4386eSCy Schubert    my %defaults = ( debug => 0, section => 0, %$defaults_ref );
*e0c4386eSCy Schubert    my $fh = undef;
*e0c4386eSCy Schubert    my $filename = undef;
*e0c4386eSCy Schubert    my $contents;
*e0c4386eSCy Schubert
*e0c4386eSCy Schubert    # If not a file handle, then it's assume to be a file path (a string)
*e0c4386eSCy Schubert    if (ref $input eq "") {
*e0c4386eSCy Schubert        $filename = $input;
*e0c4386eSCy Schubert        open $fh, $input or die "Trying to read $filename: $!\n";
*e0c4386eSCy Schubert        print STDERR "DEBUG: Reading $input\n" if $defaults{debug};
*e0c4386eSCy Schubert        $input = $fh;
*e0c4386eSCy Schubert    }
*e0c4386eSCy Schubert    if (ref $input eq "GLOB") {
*e0c4386eSCy Schubert        local $/ = undef;
*e0c4386eSCy Schubert        $contents = <$input>;
*e0c4386eSCy Schubert    } else {
*e0c4386eSCy Schubert        die "Unknown input type";
*e0c4386eSCy Schubert    }
*e0c4386eSCy Schubert
*e0c4386eSCy Schubert    my @invisible_names = ();
*e0c4386eSCy Schubert    my %podinfo = ( section => $defaults{section});
*e0c4386eSCy Schubert    $podinfo{lastsecttext} = ""; # init needed in case input file is empty
*e0c4386eSCy Schubert
*e0c4386eSCy Schubert    # Regexp to split a text into paragraphs found at
*e0c4386eSCy Schubert    # https://www.perlmonks.org/?node_id=584367
*e0c4386eSCy Schubert    # Most of all, \G (continue at last match end) and /g (anchor
*e0c4386eSCy Schubert    # this match for \G) are significant
*e0c4386eSCy Schubert    foreach (map { /\G((?:(?!\n\n).)*\n+|.+\z)/sg } $contents) {
*e0c4386eSCy Schubert        # Remove as many line endings as possible from the end of the paragraph
*e0c4386eSCy Schubert        while (s|\R$||) {}
*e0c4386eSCy Schubert
*e0c4386eSCy Schubert        print STDERR "DEBUG: Paragraph:\n$_\n"
*e0c4386eSCy Schubert            if $defaults{debug};
*e0c4386eSCy Schubert
*e0c4386eSCy Schubert        # Stop reading when we have reached past the NAME section.
*e0c4386eSCy Schubert        last if (m|^=head1|
*e0c4386eSCy Schubert                 && defined $podinfo{lastsect}
*e0c4386eSCy Schubert                 && $podinfo{lastsect} eq "NAME");
*e0c4386eSCy Schubert
*e0c4386eSCy Schubert        # Collect the section name
*e0c4386eSCy Schubert        if (m|^=head1\s*(.*)|) {
*e0c4386eSCy Schubert            $podinfo{lastsect} = $1;
*e0c4386eSCy Schubert            $podinfo{lastsect} =~ s/\s+$//;
*e0c4386eSCy Schubert            print STDERR "DEBUG: Found new pod section $1\n"
*e0c4386eSCy Schubert                if $defaults{debug};
*e0c4386eSCy Schubert            print STDERR "DEBUG: Clearing pod section text\n"
*e0c4386eSCy Schubert                if $defaults{debug};
*e0c4386eSCy Schubert            $podinfo{lastsecttext} = "";
*e0c4386eSCy Schubert        }
*e0c4386eSCy Schubert
*e0c4386eSCy Schubert        # Add invisible names
*e0c4386eSCy Schubert        if (m|^=for\s+openssl\s+names:\s*(.*)|s) {
*e0c4386eSCy Schubert            my $x = $1;
*e0c4386eSCy Schubert            my @tmp = map { map { s/\s+//g; $_ } split(/,/, $_) } $x;
*e0c4386eSCy Schubert            print STDERR
*e0c4386eSCy Schubert                "DEBUG: Found invisible names: ", join(', ', @tmp), "\n"
*e0c4386eSCy Schubert                if $defaults{debug};
*e0c4386eSCy Schubert            push @invisible_names, @tmp;
*e0c4386eSCy Schubert        }
*e0c4386eSCy Schubert
*e0c4386eSCy Schubert        next if (m|^=| || m|^\s*$|);
*e0c4386eSCy Schubert
*e0c4386eSCy Schubert        # Collect the section text
*e0c4386eSCy Schubert        print STDERR "DEBUG: accumulating pod section text \"$_\"\n"
*e0c4386eSCy Schubert            if $defaults{debug};
*e0c4386eSCy Schubert        $podinfo{lastsecttext} .= " " if $podinfo{lastsecttext};
*e0c4386eSCy Schubert        $podinfo{lastsecttext} .= $_;
*e0c4386eSCy Schubert    }
*e0c4386eSCy Schubert
*e0c4386eSCy Schubert
*e0c4386eSCy Schubert    if (defined $fh) {
*e0c4386eSCy Schubert        close $fh;
*e0c4386eSCy Schubert        print STDERR "DEBUG: Done reading $filename\n" if $defaults{debug};
*e0c4386eSCy Schubert    }
*e0c4386eSCy Schubert
*e0c4386eSCy Schubert    $podinfo{lastsecttext} =~ s|\s+-\s+.*$||s;
*e0c4386eSCy Schubert
*e0c4386eSCy Schubert    my @names =
*e0c4386eSCy Schubert        map { s/^\s+//g;        # Trim prefix blanks
*e0c4386eSCy Schubert              s/\s+$//g;        # Trim suffix blanks
*e0c4386eSCy Schubert              s|/|-|g;          # Treat slash as dash
*e0c4386eSCy Schubert              $_ }
*e0c4386eSCy Schubert        split(m|,|, $podinfo{lastsecttext});
*e0c4386eSCy Schubert
*e0c4386eSCy Schubert    print STDERR
*e0c4386eSCy Schubert        "DEBUG: Collected names are: ",
*e0c4386eSCy Schubert        join(', ', @names, @invisible_names), "\n"
*e0c4386eSCy Schubert        if $defaults{debug};
*e0c4386eSCy Schubert
*e0c4386eSCy Schubert    return ( section => $podinfo{section},
*e0c4386eSCy Schubert             names => [ @names, @invisible_names ],
*e0c4386eSCy Schubert             contents => $contents,
*e0c4386eSCy Schubert             filename => $filename );
*e0c4386eSCy Schubert}
*e0c4386eSCy Schubert
*e0c4386eSCy Schubert1;