1=begin comment 2 3## Mdoc.pm -- Perl functions for mdoc processing 4## 5## Author: Oliver Kindernay (GSoC project for NTP.org) 6## 7## 8## This file is part of AutoOpts, a companion to AutoGen. 9## AutoOpts is free software. 10## AutoOpts is Copyright (C) 1992-2015 by Bruce Korb - all rights reserved 11## 12## AutoOpts is available under any one of two licenses. The license 13## in use must be one of these two and the choice is under the control 14## of the user of the license. 15## 16## The GNU Lesser General Public License, version 3 or later 17## See the files "COPYING.lgplv3" and "COPYING.gplv3" 18## 19## The Modified Berkeley Software Distribution License 20## See the file "COPYING.mbsd" 21## 22## These files have the following sha256 sums: 23## 24## 8584710e9b04216a394078dc156b781d0b47e1729104d666658aecef8ee32e95 COPYING.gplv3 25## 4379e7444a0e2ce2b12dd6f5a52a27a4d02d39d247901d3285c88cf0d37f477b COPYING.lgplv3 26## 13aa749a5b0a454917a944ed8fffc530b784f5ead522b1aacaf4ec8aa55a6239 COPYING.mbsd 27=end comment 28=head1 NAME 29 30Mdoc - perl module to parse Mdoc macros 31 32=head1 SYNOPSIS 33 34 use Mdoc qw(ns pp soff son stoggle mapwords); 35 36See mdoc2man and mdoc2texi for code examples. 37 38=head1 FUNCTIONS 39 40=over 4 41 42=item def_macro( NAME, CODE, [ raw => 1, greedy => 1, concat_until => '.Xx' ] ) 43 44Define new macro. The CODE reference will be called by call_macro(). You can 45have two distinct definitions for and inline macro and for a standalone macro 46(i. e. 'Pa' and '.Pa'). 47 48The CODE reference is passed a list of arguments and is expected to return list 49of strings and control characters (see C<CONSTANTS>). 50 51By default the surrouding "" from arguments to macros are removed, use C<raw> 52to disable this. 53 54Normaly CODE reference is passed all arguments up to next nested macro. Set 55C<greedy> to to pass everything up to the end of the line. 56 57If the concat_until is present, the line is concated until the .Xx macro is 58found. For example the following macro definition 59 60 def_macro('.Oo', gen_encloser(qw([ ]), concat_until => '.Oc' } 61 def_macro('.Cm', sub { mapwords {'($_)'} @_ } } 62 63and the following input 64 65 .Oo 66 .Cm foo | 67 .Cm bar | 68 .Oc 69 70results in [(foo) | (bar)] 71 72=item get_macro( NAME ) 73 74Returns a hash reference like: 75 76 { run => CODE, raw => [1|0], greedy => [1|0] } 77 78Where C<CODE> is the CODE reference used to define macro called C<NAME> 79 80=item parse_line( INPUT, OUTPUT_CODE, PREPROCESS_CODE ) 81 82Parse a line from the C<INPUT> filehandle. If a macro was detected it returns a 83list (MACRO_NAME, @MACRO_ARGS), otherwise it calls the C<OUTPUT_CODE>, giving 84caller a chance to modify line before printing it. If C<PREPROCESS_CODE> is 85defined it calls it prior to passing argument to a macro, giving caller a 86chance to alter them. if EOF was reached undef is returned. 87 88=item call_macro( MACRO, ARGS, ... ) 89 90Call macro C<MACRO> with C<ARGS>. The CODE reference for macro C<MACRO> is 91called and for all the nested macros. Every called macro returns a list which 92is appended to return value and returned when all nested macros are processed. 93Use to_string() to produce a printable string from the list. 94 95=item to_string ( LIST ) 96 97Processes C<LIST> returned from call_macro() and returns formatted string. 98 99=item mapwords BLOCK ARRAY 100 101This is like perl's map only it calls BLOCK only on elements which are not 102punctuation or control characters. 103 104=item space ( ['on'|'off] ) 105 106Turn spacing on or off. If called without argument it returns the current state. 107 108=item gen_encloser ( START, END ) 109 110Helper function for generating macros that enclose their arguments. 111 gen_encloser(qw({ })); 112returns 113 sub { '{', ns, @_, ns, pp('}')} 114 115=item set_Bl_callback( CODE , DEFS ) 116 117This module implements the Bl/El macros for you. Using set_Bl_callback you can 118provide a macro definition that should be executed on a .Bl call. 119 120=item set_El_callback( CODE , DEFS ) 121 122This module implements the Bl/El macros for you. Using set_El_callback you can 123provide a macro definition that should be executed on a .El call. 124 125=item set_Re_callback( CODE ) 126 127The C<CODE> is called after a Rs/Re block is done. With a hash reference as a 128parameter, describing the reference. 129 130=back 131 132=head1 CONSTANTS 133 134=over 4 135 136=item ns 137 138Indicate 'no space' between to members of the list. 139 140=item pp ( STRING ) 141 142The string is 'punctuation point'. It means that every punctuation 143preceeding that element is put behind it. 144 145=item soff 146 147Turn spacing off. 148 149=item son 150 151Turn spacing on. 152 153=item stoggle 154 155Toogle spacing. 156 157=item hs 158 159Print space no matter spacing mode. 160 161=back 162 163=head1 TODO 164 165* The concat_until only works with standalone macros. This means that 166 .Po blah Pc 167will hang until .Pc in encountered. 168 169* Provide default macros for Bd/Ed 170 171* The reference implementation is uncomplete 172 173=cut 174 175package Mdoc; 176use strict; 177use warnings; 178use List::Util qw(reduce); 179use Text::ParseWords qw(quotewords); 180use Carp; 181use Exporter qw(import); 182our @EXPORT_OK = qw(ns pp soff son stoggle hs mapwords gen_encloser nl); 183 184use constant { 185 ns => ['nospace'], 186 soff => ['spaceoff'], 187 son => ['spaceon'], 188 stoggle => ['spacetoggle'], 189 hs => ['hardspace'], 190}; 191 192sub pp { 193 my $c = shift; 194 return ['pp', $c ]; 195} 196sub gen_encloser { 197 my ($o, $c) = @_; 198 return sub { ($o, ns, @_, ns, pp($c)) }; 199} 200 201sub mapwords(&@) { 202 my ($f, @l) = @_; 203 my @res; 204 for my $el (@l) { 205 local $_ = $el; 206 push @res, $el =~ /^(?:[,\.\{\}\(\):;\[\]\|])$/ || ref $el eq 'ARRAY' ? 207 $el : $f->(); 208 } 209 return @res; 210} 211 212my %macros; 213 214############################################################################### 215 216# Default macro definitions start 217 218############################################################################### 219 220def_macro('Xo', sub { @_ }, concat_until => '.Xc'); 221 222def_macro('.Ns', sub {ns, @_}); 223def_macro('Ns', sub {ns, @_}); 224 225{ 226 my %reference; 227 def_macro('.Rs', sub { () } ); 228 def_macro('.%A', sub { 229 if ($reference{authors}) { 230 $reference{authors} .= " and @_" 231 } 232 else { 233 $reference{authors} = "@_"; 234 } 235 return (); 236 }); 237 def_macro('.%T', sub { $reference{title} = "@_"; () } ); 238 def_macro('.%O', sub { $reference{optional} = "@_"; () } ); 239 240 sub set_Re_callback { 241 my ($sub) = @_; 242 croak 'Not a CODE reference' if not ref $sub eq 'CODE'; 243 def_macro('.Re', sub { 244 my @ret = $sub->(\%reference); 245 %reference = (); @ret 246 }); 247 return; 248 } 249} 250 251def_macro('.Bl', sub { die '.Bl - no list callback set' }); 252def_macro('.It', sub { die ".It called outside of list context - maybe near line $." }); 253def_macro('.El', sub { die '.El requires .Bl first' }); 254 255 256{ 257 my $elcb = sub { () }; 258 259 sub set_El_callback { 260 my ($sub) = @_; 261 croak 'Not a CODE reference' if ref $sub ne 'CODE'; 262 $elcb = $sub; 263 return; 264 } 265 266 sub set_Bl_callback { 267 my ($blcb, %defs) = @_; 268 croak 'Not a CODE reference' if ref $blcb ne 'CODE'; 269 def_macro('.Bl', sub { 270 271 my $orig_it = get_macro('.It'); 272 my $orig_el = get_macro('.El'); 273 my $orig_bl = get_macro('.Bl'); 274 my $orig_elcb = $elcb; 275 276 # Restore previous .It and .El on each .El 277 def_macro('.El', sub { 278 def_macro('.El', delete $orig_el->{run}, %$orig_el); 279 def_macro('.It', delete $orig_it->{run}, %$orig_it); 280 def_macro('.Bl', delete $orig_bl->{run}, %$orig_bl); 281 my @ret = $elcb->(@_); 282 $elcb = $orig_elcb; 283 @ret 284 }); 285 $blcb->(@_) 286 }, %defs); 287 return; 288 } 289} 290 291def_macro('.Sm', sub { 292 my ($arg) = @_; 293 if (defined $arg) { 294 space($arg); 295 } else { 296 space() eq 'off' ? 297 space('on') : 298 space('off'); 299 } 300 () 301} ); 302def_macro('Sm', do { my $off; sub { 303 my ($arg) = @_; 304 if (defined $arg && $arg =~ /^(on|off)$/) { 305 shift; 306 if ($arg eq 'off') { soff, @_; } 307 elsif ($arg eq 'on') { son, @_; } 308 } 309 else { 310 stoggle, @_; 311 } 312}} ); 313 314############################################################################### 315 316# Default macro definitions end 317 318############################################################################### 319 320sub def_macro { 321 croak 'Odd number of elements for hash argument <'.(scalar @_).'>' if @_%2; 322 my ($macro, $sub, %def) = @_; 323 croak 'Not a CODE reference' if ref $sub ne 'CODE'; 324 325 $macros{ $macro } = { 326 run => $sub, 327 greedy => delete $def{greedy} || 0, 328 raw => delete $def{raw} || 0, 329 concat_until => delete $def{concat_until}, 330 }; 331 if ($macros{ $macro }{concat_until}) { 332 $macros{ $macros{ $macro }{concat_until} } = { run => sub { @_ } }; 333 $macros{ $macro }{greedy} = 1; 334 } 335 return; 336} 337 338sub get_macro { 339 my ($macro) = @_; 340 croak "Macro <$macro> not defined" if not exists $macros{ $macro }; 341 +{ %{ $macros{ $macro } } } 342} 343 344#TODO: document this 345sub parse_opts { 346 my %args; 347 my $last; 348 for (@_) { 349 if ($_ =~ /^\\?-/) { 350 s/^\\?-//; 351 $args{$_} = 1; 352 $last = _unquote($_); 353 } 354 else { 355 $args{$last} = _unquote($_) if $last; 356 undef $last; 357 } 358 } 359 return %args; 360} 361 362sub _is_control { 363 my ($el, $expected) = @_; 364 if (defined $expected) { 365 ref $el eq 'ARRAY' and $el->[0] eq $expected; 366 } 367 else { 368 ref $el eq 'ARRAY'; 369 } 370} 371 372{ 373 my $sep = ' '; 374 375 sub to_string { 376 if (@_ > 0) { 377 # Handle punctunation 378 my ($in_brace, @punct) = ''; 379 my @new = map { 380 if (/^([\[\(])$/) { 381 ($in_brace = $1) =~ tr/([/)]/; 382 $_, ns 383 } 384 elsif (/^([\)\]])$/ && $in_brace eq $1) { 385 $in_brace = ''; 386 ns, $_ 387 } 388 elsif ($_ =~ /^[,\.;:\?\!\)\]]$/) { 389 push @punct, ns, $_; 390 (); 391 } 392 elsif (_is_control($_, 'pp')) { 393 $_->[1] 394 } 395 elsif (_is_control($_)) { 396 $_ 397 } 398 else { 399 splice (@punct), $_; 400 } 401 } @_; 402 push @new, @punct; 403 404 # Produce string out of an array dealing with the special control characters 405 # space('off') must but one character delayed 406 my ($no_space, $space_off) = 1; 407 my $res = ''; 408 while (defined(my $el = shift @new)) { 409 if (_is_control($el, 'hardspace')) { $no_space = 1; $res .= ' ' } 410 elsif (_is_control($el, 'nospace')) { $no_space = 1; } 411 elsif (_is_control($el, 'spaceoff')) { $space_off = 1; } 412 elsif (_is_control($el, 'spaceon')) { space('on'); } 413 elsif (_is_control($el, 'spacetoggle')) { space() eq 'on' ? 414 $space_off = 1 : 415 space('on') } 416 else { 417 if ($no_space) { 418 $no_space = 0; 419 $res .= "$el" 420 } 421 else { 422 $res .= "$sep$el" 423 } 424 425 if ($space_off) { space('off'); $space_off = 0; } 426 } 427 } 428 $res 429 } 430 else { 431 ''; 432 } 433 } 434 435 sub space { 436 my ($arg) = @_; 437 if (defined $arg && $arg =~ /^(on|off)$/) { 438 $sep = ' ' if $arg eq 'on'; 439 $sep = '' if $arg eq 'off'; 440 return; 441 } 442 else { 443 return $sep eq '' ? 'off' : 'on'; 444 } 445 } 446} 447 448sub _unquote { 449 my @args = @_; 450 $_ =~ s/^"([^"]+)"$/$1/g for @args; 451 wantarray ? @args : $args[0]; 452} 453 454sub call_macro { 455 my ($macro, @args) = @_; 456 my @ret; 457 458 my @newargs; 459 my $i = 0; 460 461 @args = _unquote(@args) if (!$macros{ $macro }{raw}); 462 463 # Call any callable macros in the argument list 464 for (@args) { 465 if ($_ =~ /^[A-Z][a-z]+$/ && exists $macros{ $_ }) { 466 push @ret, call_macro($_, @args[$i+1 .. $#args]); 467 last; 468 } else { 469 if ($macros{ $macro }{greedy}) { 470 push @ret, $_; 471 } 472 else { 473 push @newargs, $_; 474 } 475 } 476 $i++; 477 } 478 479 if ($macros{ $macro }{concat_until}) { 480 my ($n_macro, @n_args) = (''); 481 while (1) { 482 die "EOF was reached and no $macros{ $macro }{concat_until} found" 483 if not defined $n_macro; 484 ($n_macro, @n_args) = parse_line(undef, sub { push @ret, shift }); 485 if ($n_macro eq $macros{ $macro }{concat_until}) { 486 push @ret, call_macro($n_macro, @n_args); 487 last; 488 } 489 else { 490 $n_macro =~ s/^\.//; 491 push @ret, call_macro($n_macro, @n_args) if exists $macros{ $n_macro }; 492 } 493 } 494 } 495 496 if ($macros{ $macro }{greedy}) { 497 #print "MACROG $macro (", (join ', ', @ret), ")\n"; 498 return $macros{ $macro }{run}->(@ret); 499 } 500 else { 501 #print "MACRO $macro (", (join ', ', @newargs), ")".(join ', ', @ret)."\n"; 502 return $macros{ $macro }{run}->(@newargs), @ret; 503 } 504} 505 506{ 507 my ($in_fh, $out_sub, $preprocess_sub); 508 sub parse_line { 509 $in_fh = $_[0] if defined $_[0] || !defined $in_fh; 510 $out_sub = $_[1] if defined $_[1] || !defined $out_sub; 511 $preprocess_sub = $_[2] if defined $_[2] || !defined $preprocess_sub; 512 513 croak 'out_sub not a CODE reference' 514 if not ref $out_sub eq 'CODE'; 515 croak 'preprocess_sub not a CODE reference' 516 if defined $preprocess_sub && not ref $preprocess_sub eq 'CODE'; 517 518 while (my $line = <$in_fh>) { 519 chomp $line; 520 if ($line =~ /^\.[A-z][a-z0-9]+/ || $line =~ /^\.%[A-Z]/ || 521 $line =~ /^\.\\"/) 522 { 523 $line =~ s/ +/ /g; 524 my ($macro, @args) = quotewords(' ', 1, $line); 525 @args = grep { defined $_ } @args; 526 $preprocess_sub->(@args) if defined $preprocess_sub; 527 if ($macro && exists $macros{ $macro }) { 528 return ($macro, @args); 529 } else { 530 $out_sub->($line); 531 } 532 } 533 else { 534 $out_sub->($line); 535 } 536 } 537 return; 538 } 539} 540 5411; 542__END__ 543