=head1 NAME Mdoc - perl module to parse Mdoc macros =head1 SYNOPSIS use Mdoc qw(ns pp soff son stoggle mapwords); See mdoc2man and mdoc2texi for code examples. =head1 FUNCTIONS =over 4 =item def_macro( NAME, CODE, [ raw => 1, greedy => 1, concat_until => '.Xx' ] ) Define new macro. The CODE reference will be called by call_macro(). You can have two distinct definitions for and inline macro and for a standalone macro (i. e. 'Pa' and '.Pa'). The CODE reference is passed a list of arguments and is expected to return list of strings and control characters (see C<CONSTANTS>). By default the surrouding "" from arguments to macros are removed, use C<raw> to disable this. Normaly CODE reference is passed all arguments up to next nested macro. Set C<greedy> to to pass everything up to the end of the line. If the concat_until is present, the line is concated until the .Xx macro is found. For example the following macro definition def_macro('.Oo', gen_encloser(qw([ ]), concat_until => '.Oc' } def_macro('.Cm', sub { mapwords {'($_)'} @_ } } and the following input .Oo .Cm foo | .Cm bar | .Oc results in [(foo) | (bar)] =item get_macro( NAME ) Returns a hash reference like: { run => CODE, raw => [1|0], greedy => [1|0] } Where C<CODE> is the CODE reference used to define macro called C<NAME> =item parse_line( INPUT, OUTPUT_CODE, PREPROCESS_CODE ) Parse a line from the C<INPUT> filehandle. If a macro was detected it returns a list (MACRO_NAME, @MACRO_ARGS), otherwise it calls the C<OUTPUT_CODE>, giving caller a chance to modify line before printing it. If C<PREPROCESS_CODE> is defined it calls it prior to passing argument to a macro, giving caller a chance to alter them. if EOF was reached undef is returned. =item call_macro( MACRO, ARGS, ... ) Call macro C<MACRO> with C<ARGS>. The CODE reference for macro C<MACRO> is called and for all the nested macros. Every called macro returns a list which is appended to return value and returned when all nested macros are processed. Use to_string() to produce a printable string from the list. =item to_string ( LIST ) Processes C<LIST> returned from call_macro() and returns formatted string. =item mapwords BLOCK ARRAY This is like perl's map only it calls BLOCK only on elements which are not punctuation or control characters. =item space ( ['on'|'off] ) Turn spacing on or off. If called without argument it returns the current state. =item gen_encloser ( START, END ) Helper function for generating macros that enclose their arguments. gen_encloser(qw({ })); returns sub { '{', ns, @_, ns, pp('}')} =item set_Bl_callback( CODE , DEFS ) This module implements the Bl/El macros for you. Using set_Bl_callback you can provide a macro definition that should be executed on a .Bl call. =item set_El_callback( CODE , DEFS ) This module implements the Bl/El macros for you. Using set_El_callback you can provide a macro definition that should be executed on a .El call. =item set_Re_callback( CODE ) The C<CODE> is called after a Rs/Re block is done. With a hash reference as a parameter, describing the reference. =back =head1 CONSTANTS =over 4 =item ns Indicate 'no space' between to members of the list. =item pp ( STRING ) The string is 'punctuation point'. It means that every punctuation preceeding that element is put behind it. =item soff Turn spacing off. =item son Turn spacing on. =item stoggle Toogle spacing. =item hs Print space no matter spacing mode. =back =head1 TODO * The concat_until only works with standalone macros. This means that .Po blah Pc will hang until .Pc in encountered. * Provide default macros for Bd/Ed * The reference implementation is uncomplete =cut package Mdoc; use strict; use warnings; use List::Util qw(reduce); use Text::ParseWords qw(quotewords); use Carp; use Exporter qw(import); our @EXPORT_OK = qw(ns pp soff son stoggle hs mapwords gen_encloser nl); use constant { ns => ['nospace'], soff => ['spaceoff'], son => ['spaceon'], stoggle => ['spacetoggle'], hs => ['hardspace'], }; sub pp { my $c = shift; return ['pp', $c ]; } sub gen_encloser { my ($o, $c) = @_; return sub { ($o, ns, @_, ns, pp($c)) }; } sub mapwords(&@) { my ($f, @l) = @_; my @res; for my $el (@l) { local $_ = $el; push @res, $el =~ /^(?:[,\.\{\}\(\):;\[\]\|])$/ || ref $el eq 'ARRAY' ? $el : $f->(); } return @res; } my %macros; ############################################################################### # Default macro definitions start ############################################################################### def_macro('Xo', sub { @_ }, concat_until => '.Xc'); def_macro('.Ns', sub {ns, @_}); def_macro('Ns', sub {ns, @_}); { my %reference; def_macro('.Rs', sub { () } ); def_macro('.%A', sub { if ($reference{authors}) { $reference{authors} .= " and @_" } else { $reference{authors} = "@_"; } return (); }); def_macro('.%T', sub { $reference{title} = "@_"; () } ); def_macro('.%O', sub { $reference{optional} = "@_"; () } ); sub set_Re_callback { my ($sub) = @_; croak 'Not a CODE reference' if not ref $sub eq 'CODE'; def_macro('.Re', sub { my @ret = $sub->(\%reference); %reference = (); @ret }); return; } } def_macro('.Bl', sub { die '.Bl - no list callback set' }); def_macro('.It', sub { die ".It called outside of list context - maybe near line $." }); def_macro('.El', sub { die '.El requires .Bl first' }); { my $elcb = sub { () }; sub set_El_callback { my ($sub) = @_; croak 'Not a CODE reference' if ref $sub ne 'CODE'; $elcb = $sub; return; } sub set_Bl_callback { my ($blcb, %defs) = @_; croak 'Not a CODE reference' if ref $blcb ne 'CODE'; def_macro('.Bl', sub { my $orig_it = get_macro('.It'); my $orig_el = get_macro('.El'); my $orig_bl = get_macro('.Bl'); my $orig_elcb = $elcb; # Restore previous .It and .El on each .El def_macro('.El', sub { def_macro('.El', delete $orig_el->{run}, %$orig_el); def_macro('.It', delete $orig_it->{run}, %$orig_it); def_macro('.Bl', delete $orig_bl->{run}, %$orig_bl); my @ret = $elcb->(@_); $elcb = $orig_elcb; @ret }); $blcb->(@_) }, %defs); return; } } def_macro('.Sm', sub { my ($arg) = @_; if (defined $arg) { space($arg); } else { space() eq 'off' ? space('on') : space('off'); } () } ); def_macro('Sm', do { my $off; sub { my ($arg) = @_; if (defined $arg && $arg =~ /^(on|off)$/) { shift; if ($arg eq 'off') { soff, @_; } elsif ($arg eq 'on') { son, @_; } } else { stoggle, @_; } }} ); ############################################################################### # Default macro definitions end ############################################################################### sub def_macro { croak 'Odd number of elements for hash argument <'.(scalar @_).'>' if @_%2; my ($macro, $sub, %def) = @_; croak 'Not a CODE reference' if ref $sub ne 'CODE'; $macros{ $macro } = { run => $sub, greedy => delete $def{greedy} || 0, raw => delete $def{raw} || 0, concat_until => delete $def{concat_until}, }; if ($macros{ $macro }{concat_until}) { $macros{ $macros{ $macro }{concat_until} } = { run => sub { @_ } }; $macros{ $macro }{greedy} = 1; } return; } sub get_macro { my ($macro) = @_; croak "Macro <$macro> not defined" if not exists $macros{ $macro }; +{ %{ $macros{ $macro } } } } #TODO: document this sub parse_opts { my %args; my $last; for (@_) { if ($_ =~ /^\\?-/) { s/^\\?-//; $args{$_} = 1; $last = _unquote($_); } else { $args{$last} = _unquote($_) if $last; undef $last; } } return %args; } sub _is_control { my ($el, $expected) = @_; if (defined $expected) { ref $el eq 'ARRAY' and $el->[0] eq $expected; } else { ref $el eq 'ARRAY'; } } { my $sep = ' '; sub to_string { if (@_ > 0) { # Handle punctunation my ($in_brace, @punct) = ''; my @new = map { if (/^([\[\(])$/) { ($in_brace = $1) =~ tr/([/)]/; $_, ns } elsif (/^([\)\]])$/ && $in_brace eq $1) { $in_brace = ''; ns, $_ } elsif ($_ =~ /^[,\.;:\?\!\)\]]$/) { push @punct, ns, $_; (); } elsif (_is_control($_, 'pp')) { $_->[1] } elsif (_is_control($_)) { $_ } else { splice (@punct), $_; } } @_; push @new, @punct; # Produce string out of an array dealing with the special control characters # space('off') must but one character delayed my ($no_space, $space_off) = 1; my $res = ''; while (defined(my $el = shift @new)) { if (_is_control($el, 'hardspace')) { $no_space = 1; $res .= ' ' } elsif (_is_control($el, 'nospace')) { $no_space = 1; } elsif (_is_control($el, 'spaceoff')) { $space_off = 1; } elsif (_is_control($el, 'spaceon')) { space('on'); } elsif (_is_control($el, 'spacetoggle')) { space() eq 'on' ? $space_off = 1 : space('on') } else { if ($no_space) { $no_space = 0; $res .= "$el" } else { $res .= "$sep$el" } if ($space_off) { space('off'); $space_off = 0; } } } $res } else { ''; } } sub space { my ($arg) = @_; if (defined $arg && $arg =~ /^(on|off)$/) { $sep = ' ' if $arg eq 'on'; $sep = '' if $arg eq 'off'; return; } else { return $sep eq '' ? 'off' : 'on'; } } } sub _unquote { my @args = @_; $_ =~ s/^"([^"]+)"$/$1/g for @args; wantarray ? @args : $args[0]; } sub call_macro { my ($macro, @args) = @_; my @ret; my @newargs; my $i = 0; @args = _unquote(@args) if (!$macros{ $macro }{raw}); # Call any callable macros in the argument list for (@args) { if ($_ =~ /^[A-Z][a-z]+$/ && exists $macros{ $_ }) { push @ret, call_macro($_, @args[$i+1 .. $#args]); last; } else { if ($macros{ $macro }{greedy}) { push @ret, $_; } else { push @newargs, $_; } } $i++; } if ($macros{ $macro }{concat_until}) { my ($n_macro, @n_args) = (''); while (1) { die "EOF was reached and no $macros{ $macro }{concat_until} found" if not defined $n_macro; ($n_macro, @n_args) = parse_line(undef, sub { push @ret, shift }); if ($n_macro eq $macros{ $macro }{concat_until}) { push @ret, call_macro($n_macro, @n_args); last; } else { $n_macro =~ s/^\.//; push @ret, call_macro($n_macro, @n_args) if exists $macros{ $n_macro }; } } } if ($macros{ $macro }{greedy}) { #print "MACROG $macro (", (join ', ', @ret), ")\n"; return $macros{ $macro }{run}->(@ret); } else { #print "MACRO $macro (", (join ', ', @newargs), ")".(join ', ', @ret)."\n"; return $macros{ $macro }{run}->(@newargs), @ret; } } { my ($in_fh, $out_sub, $preprocess_sub); sub parse_line { $in_fh = $_[0] if defined $_[0] || !defined $in_fh; $out_sub = $_[1] if defined $_[1] || !defined $out_sub; $preprocess_sub = $_[2] if defined $_[2] || !defined $preprocess_sub; croak 'out_sub not a CODE reference' if not ref $out_sub eq 'CODE'; croak 'preprocess_sub not a CODE reference' if defined $preprocess_sub && not ref $preprocess_sub eq 'CODE'; while (my $line = <$in_fh>) { chomp $line; if ($line =~ /^\.[A-z][a-z0-9]+/ || $line =~ /^\.%[A-Z]/ || $line =~ /^\.\\"/) { $line =~ s/ +/ /g; my ($macro, @args) = quotewords(' ', 1, $line); @args = grep { defined $_ } @args; $preprocess_sub->(@args) if defined $preprocess_sub; if ($macro && exists $macros{ $macro }) { return ($macro, @args); } else { $out_sub->($line); } } else { $out_sub->($line); } } return; } } 1; __END__