1=head1 NAME 2 3Mdoc - perl module to parse Mdoc macros 4 5=head1 SYNOPSIS 6 7 use Mdoc qw(ns pp soff son stoggle mapwords); 8 9See mdoc2man and mdoc2texi for code examples. 10 11=head1 FUNCTIONS 12 13=over 4 14 15=item def_macro( NAME, CODE, [ raw => 1, greedy => 1, concat_until => '.Xx' ] ) 16 17Define new macro. The CODE reference will be called by call_macro(). You can 18have two distinct definitions for and inline macro and for a standalone macro 19(i. e. 'Pa' and '.Pa'). 20 21The CODE reference is passed a list of arguments and is expected to return list 22of strings and control characters (see C<CONSTANTS>). 23 24By default the surrouding "" from arguments to macros are removed, use C<raw> 25to disable this. 26 27Normaly CODE reference is passed all arguments up to next nested macro. Set 28C<greedy> to to pass everything up to the end of the line. 29 30If the concat_until is present, the line is concated until the .Xx macro is 31found. For example the following macro definition 32 33 def_macro('.Oo', gen_encloser(qw([ ]), concat_until => '.Oc' } 34 def_macro('.Cm', sub { mapwords {'($_)'} @_ } } 35 36and the following input 37 38 .Oo 39 .Cm foo | 40 .Cm bar | 41 .Oc 42 43results in [(foo) | (bar)] 44 45=item get_macro( NAME ) 46 47Returns a hash reference like: 48 49 { run => CODE, raw => [1|0], greedy => [1|0] } 50 51Where C<CODE> is the CODE reference used to define macro called C<NAME> 52 53=item parse_line( INPUT, OUTPUT_CODE, PREPROCESS_CODE ) 54 55Parse a line from the C<INPUT> filehandle. If a macro was detected it returns a 56list (MACRO_NAME, @MACRO_ARGS), otherwise it calls the C<OUTPUT_CODE>, giving 57caller a chance to modify line before printing it. If C<PREPROCESS_CODE> is 58defined it calls it prior to passing argument to a macro, giving caller a 59chance to alter them. if EOF was reached undef is returned. 60 61=item call_macro( MACRO, ARGS, ... ) 62 63Call macro C<MACRO> with C<ARGS>. The CODE reference for macro C<MACRO> is 64called and for all the nested macros. Every called macro returns a list which 65is appended to return value and returned when all nested macros are processed. 66Use to_string() to produce a printable string from the list. 67 68=item to_string ( LIST ) 69 70Processes C<LIST> returned from call_macro() and returns formatted string. 71 72=item mapwords BLOCK ARRAY 73 74This is like perl's map only it calls BLOCK only on elements which are not 75punctuation or control characters. 76 77=item space ( ['on'|'off] ) 78 79Turn spacing on or off. If called without argument it returns the current state. 80 81=item gen_encloser ( START, END ) 82 83Helper function for generating macros that enclose their arguments. 84 gen_encloser(qw({ })); 85returns 86 sub { '{', ns, @_, ns, pp('}')} 87 88=item set_Bl_callback( CODE , DEFS ) 89 90This module implements the Bl/El macros for you. Using set_Bl_callback you can 91provide a macro definition that should be executed on a .Bl call. 92 93=item set_El_callback( CODE , DEFS ) 94 95This module implements the Bl/El macros for you. Using set_El_callback you can 96provide a macro definition that should be executed on a .El call. 97 98=item set_Re_callback( CODE ) 99 100The C<CODE> is called after a Rs/Re block is done. With a hash reference as a 101parameter, describing the reference. 102 103=back 104 105=head1 CONSTANTS 106 107=over 4 108 109=item ns 110 111Indicate 'no space' between to members of the list. 112 113=item pp ( STRING ) 114 115The string is 'punctuation point'. It means that every punctuation 116preceeding that element is put behind it. 117 118=item soff 119 120Turn spacing off. 121 122=item son 123 124Turn spacing on. 125 126=item stoggle 127 128Toogle spacing. 129 130=item hs 131 132Print space no matter spacing mode. 133 134=back 135 136=head1 TODO 137 138* The concat_until only works with standalone macros. This means that 139 .Po blah Pc 140will hang until .Pc in encountered. 141 142* Provide default macros for Bd/Ed 143 144* The reference implementation is uncomplete 145 146=cut 147 148package Mdoc; 149use strict; 150use warnings; 151use List::Util qw(reduce); 152use Text::ParseWords qw(quotewords); 153use Carp; 154use Exporter qw(import); 155our @EXPORT_OK = qw(ns pp soff son stoggle hs mapwords gen_encloser nl); 156 157use constant { 158 ns => ['nospace'], 159 soff => ['spaceoff'], 160 son => ['spaceon'], 161 stoggle => ['spacetoggle'], 162 hs => ['hardspace'], 163}; 164 165sub pp { 166 my $c = shift; 167 return ['pp', $c ]; 168} 169sub gen_encloser { 170 my ($o, $c) = @_; 171 return sub { ($o, ns, @_, ns, pp($c)) }; 172} 173 174sub mapwords(&@) { 175 my ($f, @l) = @_; 176 my @res; 177 for my $el (@l) { 178 local $_ = $el; 179 push @res, $el =~ /^(?:[,\.\{\}\(\):;\[\]\|])$/ || ref $el eq 'ARRAY' ? 180 $el : $f->(); 181 } 182 return @res; 183} 184 185my %macros; 186 187############################################################################### 188 189# Default macro definitions start 190 191############################################################################### 192 193def_macro('Xo', sub { @_ }, concat_until => '.Xc'); 194 195def_macro('.Ns', sub {ns, @_}); 196def_macro('Ns', sub {ns, @_}); 197 198{ 199 my %reference; 200 def_macro('.Rs', sub { () } ); 201 def_macro('.%A', sub { 202 if ($reference{authors}) { 203 $reference{authors} .= " and @_" 204 } 205 else { 206 $reference{authors} = "@_"; 207 } 208 return (); 209 }); 210 def_macro('.%T', sub { $reference{title} = "@_"; () } ); 211 def_macro('.%O', sub { $reference{optional} = "@_"; () } ); 212 213 sub set_Re_callback { 214 my ($sub) = @_; 215 croak 'Not a CODE reference' if not ref $sub eq 'CODE'; 216 def_macro('.Re', sub { 217 my @ret = $sub->(\%reference); 218 %reference = (); @ret 219 }); 220 return; 221 } 222} 223 224def_macro('.Bl', sub { die '.Bl - no list callback set' }); 225def_macro('.It', sub { die ".It called outside of list context - maybe near line $." }); 226def_macro('.El', sub { die '.El requires .Bl first' }); 227 228 229{ 230 my $elcb = sub { () }; 231 232 sub set_El_callback { 233 my ($sub) = @_; 234 croak 'Not a CODE reference' if ref $sub ne 'CODE'; 235 $elcb = $sub; 236 return; 237 } 238 239 sub set_Bl_callback { 240 my ($blcb, %defs) = @_; 241 croak 'Not a CODE reference' if ref $blcb ne 'CODE'; 242 def_macro('.Bl', sub { 243 244 my $orig_it = get_macro('.It'); 245 my $orig_el = get_macro('.El'); 246 my $orig_bl = get_macro('.Bl'); 247 my $orig_elcb = $elcb; 248 249 # Restore previous .It and .El on each .El 250 def_macro('.El', sub { 251 def_macro('.El', delete $orig_el->{run}, %$orig_el); 252 def_macro('.It', delete $orig_it->{run}, %$orig_it); 253 def_macro('.Bl', delete $orig_bl->{run}, %$orig_bl); 254 my @ret = $elcb->(@_); 255 $elcb = $orig_elcb; 256 @ret 257 }); 258 $blcb->(@_) 259 }, %defs); 260 return; 261 } 262} 263 264def_macro('.Sm', sub { 265 my ($arg) = @_; 266 if (defined $arg) { 267 space($arg); 268 } else { 269 space() eq 'off' ? 270 space('on') : 271 space('off'); 272 } 273 () 274} ); 275def_macro('Sm', do { my $off; sub { 276 my ($arg) = @_; 277 if (defined $arg && $arg =~ /^(on|off)$/) { 278 shift; 279 if ($arg eq 'off') { soff, @_; } 280 elsif ($arg eq 'on') { son, @_; } 281 } 282 else { 283 stoggle, @_; 284 } 285}} ); 286 287############################################################################### 288 289# Default macro definitions end 290 291############################################################################### 292 293sub def_macro { 294 croak 'Odd number of elements for hash argument <'.(scalar @_).'>' if @_%2; 295 my ($macro, $sub, %def) = @_; 296 croak 'Not a CODE reference' if ref $sub ne 'CODE'; 297 298 $macros{ $macro } = { 299 run => $sub, 300 greedy => delete $def{greedy} || 0, 301 raw => delete $def{raw} || 0, 302 concat_until => delete $def{concat_until}, 303 }; 304 if ($macros{ $macro }{concat_until}) { 305 $macros{ $macros{ $macro }{concat_until} } = { run => sub { @_ } }; 306 $macros{ $macro }{greedy} = 1; 307 } 308 return; 309} 310 311sub get_macro { 312 my ($macro) = @_; 313 croak "Macro <$macro> not defined" if not exists $macros{ $macro }; 314 +{ %{ $macros{ $macro } } } 315} 316 317#TODO: document this 318sub parse_opts { 319 my %args; 320 my $last; 321 for (@_) { 322 if ($_ =~ /^\\?-/) { 323 s/^\\?-//; 324 $args{$_} = 1; 325 $last = _unquote($_); 326 } 327 else { 328 $args{$last} = _unquote($_) if $last; 329 undef $last; 330 } 331 } 332 return %args; 333} 334 335sub _is_control { 336 my ($el, $expected) = @_; 337 if (defined $expected) { 338 ref $el eq 'ARRAY' and $el->[0] eq $expected; 339 } 340 else { 341 ref $el eq 'ARRAY'; 342 } 343} 344 345{ 346 my $sep = ' '; 347 348 sub to_string { 349 if (@_ > 0) { 350 # Handle punctunation 351 my ($in_brace, @punct) = ''; 352 my @new = map { 353 if (/^([\[\(])$/) { 354 ($in_brace = $1) =~ tr/([/)]/; 355 $_, ns 356 } 357 elsif (/^([\)\]])$/ && $in_brace eq $1) { 358 $in_brace = ''; 359 ns, $_ 360 } 361 elsif ($_ =~ /^[,\.;:\?\!\)\]]$/) { 362 push @punct, ns, $_; 363 (); 364 } 365 elsif (_is_control($_, 'pp')) { 366 $_->[1] 367 } 368 elsif (_is_control($_)) { 369 $_ 370 } 371 else { 372 splice (@punct), $_; 373 } 374 } @_; 375 push @new, @punct; 376 377 # Produce string out of an array dealing with the special control characters 378 # space('off') must but one character delayed 379 my ($no_space, $space_off) = 1; 380 my $res = ''; 381 while (defined(my $el = shift @new)) { 382 if (_is_control($el, 'hardspace')) { $no_space = 1; $res .= ' ' } 383 elsif (_is_control($el, 'nospace')) { $no_space = 1; } 384 elsif (_is_control($el, 'spaceoff')) { $space_off = 1; } 385 elsif (_is_control($el, 'spaceon')) { space('on'); } 386 elsif (_is_control($el, 'spacetoggle')) { space() eq 'on' ? 387 $space_off = 1 : 388 space('on') } 389 else { 390 if ($no_space) { 391 $no_space = 0; 392 $res .= "$el" 393 } 394 else { 395 $res .= "$sep$el" 396 } 397 398 if ($space_off) { space('off'); $space_off = 0; } 399 } 400 } 401 $res 402 } 403 else { 404 ''; 405 } 406 } 407 408 sub space { 409 my ($arg) = @_; 410 if (defined $arg && $arg =~ /^(on|off)$/) { 411 $sep = ' ' if $arg eq 'on'; 412 $sep = '' if $arg eq 'off'; 413 return; 414 } 415 else { 416 return $sep eq '' ? 'off' : 'on'; 417 } 418 } 419} 420 421sub _unquote { 422 my @args = @_; 423 $_ =~ s/^"([^"]+)"$/$1/g for @args; 424 wantarray ? @args : $args[0]; 425} 426 427sub call_macro { 428 my ($macro, @args) = @_; 429 my @ret; 430 431 my @newargs; 432 my $i = 0; 433 434 @args = _unquote(@args) if (!$macros{ $macro }{raw}); 435 436 # Call any callable macros in the argument list 437 for (@args) { 438 if ($_ =~ /^[A-Z][a-z]+$/ && exists $macros{ $_ }) { 439 push @ret, call_macro($_, @args[$i+1 .. $#args]); 440 last; 441 } else { 442 if ($macros{ $macro }{greedy}) { 443 push @ret, $_; 444 } 445 else { 446 push @newargs, $_; 447 } 448 } 449 $i++; 450 } 451 452 if ($macros{ $macro }{concat_until}) { 453 my ($n_macro, @n_args) = (''); 454 while (1) { 455 die "EOF was reached and no $macros{ $macro }{concat_until} found" 456 if not defined $n_macro; 457 ($n_macro, @n_args) = parse_line(undef, sub { push @ret, shift }); 458 if ($n_macro eq $macros{ $macro }{concat_until}) { 459 push @ret, call_macro($n_macro, @n_args); 460 last; 461 } 462 else { 463 $n_macro =~ s/^\.//; 464 push @ret, call_macro($n_macro, @n_args) if exists $macros{ $n_macro }; 465 } 466 } 467 } 468 469 if ($macros{ $macro }{greedy}) { 470 #print "MACROG $macro (", (join ', ', @ret), ")\n"; 471 return $macros{ $macro }{run}->(@ret); 472 } 473 else { 474 #print "MACRO $macro (", (join ', ', @newargs), ")".(join ', ', @ret)."\n"; 475 return $macros{ $macro }{run}->(@newargs), @ret; 476 } 477} 478 479{ 480 my ($in_fh, $out_sub, $preprocess_sub); 481 sub parse_line { 482 $in_fh = $_[0] if defined $_[0] || !defined $in_fh; 483 $out_sub = $_[1] if defined $_[1] || !defined $out_sub; 484 $preprocess_sub = $_[2] if defined $_[2] || !defined $preprocess_sub; 485 486 croak 'out_sub not a CODE reference' 487 if not ref $out_sub eq 'CODE'; 488 croak 'preprocess_sub not a CODE reference' 489 if defined $preprocess_sub && not ref $preprocess_sub eq 'CODE'; 490 491 while (my $line = <$in_fh>) { 492 chomp $line; 493 if ($line =~ /^\.[A-z][a-z0-9]+/ || $line =~ /^\.%[A-Z]/ || 494 $line =~ /^\.\\"/) 495 { 496 $line =~ s/ +/ /g; 497 my ($macro, @args) = quotewords(' ', 1, $line); 498 @args = grep { defined $_ } @args; 499 $preprocess_sub->(@args) if defined $preprocess_sub; 500 if ($macro && exists $macros{ $macro }) { 501 return ($macro, @args); 502 } else { 503 $out_sub->($line); 504 } 505 } 506 else { 507 $out_sub->($line); 508 } 509 } 510 return; 511 } 512} 513 5141; 515__END__ 516