xref: /freebsd/contrib/mandoc/mandoc.3 (revision cc426dd31990b8b50b210efc450e404596548ca1)
1.\"	$Id: mandoc.3,v 1.41 2017/07/04 23:40:01 schwarze Exp $
2.\"
3.\" Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4.\" Copyright (c) 2010-2017 Ingo Schwarze <schwarze@openbsd.org>
5.\"
6.\" Permission to use, copy, modify, and distribute this software for any
7.\" purpose with or without fee is hereby granted, provided that the above
8.\" copyright notice and this permission notice appear in all copies.
9.\"
10.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17.\"
18.Dd $Mdocdate: July 4 2017 $
19.Dt MANDOC 3
20.Os
21.Sh NAME
22.Nm mandoc ,
23.Nm deroff ,
24.Nm mandocmsg ,
25.Nm man_mparse ,
26.Nm man_validate ,
27.Nm mdoc_validate ,
28.Nm mparse_alloc ,
29.Nm mparse_free ,
30.Nm mparse_getkeep ,
31.Nm mparse_keep ,
32.Nm mparse_open ,
33.Nm mparse_readfd ,
34.Nm mparse_reset ,
35.Nm mparse_result ,
36.Nm mparse_strerror ,
37.Nm mparse_strlevel ,
38.Nm mparse_updaterc
39.Nd mandoc macro compiler library
40.Sh SYNOPSIS
41.In sys/types.h
42.In mandoc.h
43.Pp
44.Fd "#define ASCII_NBRSP"
45.Fd "#define ASCII_HYPH"
46.Fd "#define ASCII_BREAK"
47.Ft struct mparse *
48.Fo mparse_alloc
49.Fa "int options"
50.Fa "enum mandocerr mmin"
51.Fa "mandocmsg mmsg"
52.Fa "enum mandoc_os oe_e"
53.Fa "char *os_s"
54.Fc
55.Ft void
56.Fo (*mandocmsg)
57.Fa "enum mandocerr errtype"
58.Fa "enum mandoclevel level"
59.Fa "const char *file"
60.Fa "int line"
61.Fa "int col"
62.Fa "const char *msg"
63.Fc
64.Ft void
65.Fo mparse_free
66.Fa "struct mparse *parse"
67.Fc
68.Ft const char *
69.Fo mparse_getkeep
70.Fa "const struct mparse *parse"
71.Fc
72.Ft void
73.Fo mparse_keep
74.Fa "struct mparse *parse"
75.Fc
76.Ft int
77.Fo mparse_open
78.Fa "struct mparse *parse"
79.Fa "const char *fname"
80.Fc
81.Ft "enum mandoclevel"
82.Fo mparse_readfd
83.Fa "struct mparse *parse"
84.Fa "int fd"
85.Fa "const char *fname"
86.Fc
87.Ft void
88.Fo mparse_reset
89.Fa "struct mparse *parse"
90.Fc
91.Ft void
92.Fo mparse_result
93.Fa "struct mparse *parse"
94.Fa "struct roff_man **man"
95.Fa "char **sodest"
96.Fc
97.Ft "const char *"
98.Fo mparse_strerror
99.Fa "enum mandocerr"
100.Fc
101.Ft "const char *"
102.Fo mparse_strlevel
103.Fa "enum mandoclevel"
104.Fc
105.Ft void
106.Fo mparse_updaterc
107.Fa "struct mparse *parse"
108.Fa "enum mandoclevel *rc"
109.Fc
110.In roff.h
111.Ft void
112.Fo deroff
113.Fa "char **dest"
114.Fa "const struct roff_node *node"
115.Fc
116.In sys/types.h
117.In mandoc.h
118.In mdoc.h
119.Vt extern const char * const * mdoc_argnames;
120.Vt extern const char * const * mdoc_macronames;
121.Ft void
122.Fo mdoc_validate
123.Fa "struct roff_man *mdoc"
124.Fc
125.In sys/types.h
126.In mandoc.h
127.In man.h
128.Vt extern const char * const * man_macronames;
129.Ft "const struct mparse *"
130.Fo man_mparse
131.Fa "const struct roff_man *man"
132.Fc
133.Ft void
134.Fo man_validate
135.Fa "struct roff_man *man"
136.Fc
137.Sh DESCRIPTION
138The
139.Nm mandoc
140library parses a
141.Ux
142manual into an abstract syntax tree (AST).
143.Ux
144manuals are composed of
145.Xr mdoc 7
146or
147.Xr man 7 ,
148and may be mixed with
149.Xr roff 7 ,
150.Xr tbl 7 ,
151and
152.Xr eqn 7
153invocations.
154.Pp
155The following describes a general parse sequence:
156.Bl -enum
157.It
158initiate a parsing sequence with
159.Xr mchars_alloc 3
160and
161.Fn mparse_alloc ;
162.It
163open a file with
164.Xr open 2
165or
166.Fn mparse_open ;
167.It
168parse it with
169.Fn mparse_readfd ;
170.It
171close it with
172.Xr close 2 ;
173.It
174retrieve the syntax tree with
175.Fn mparse_result ;
176.It
177depending on whether the
178.Fa macroset
179member of the returned
180.Vt struct roff_man
181is
182.Dv MACROSET_MDOC
183or
184.Dv MACROSET_MAN ,
185validate it with
186.Fn mdoc_validate
187or
188.Fn man_validate ,
189respectively;
190.It
191if information about the validity of the input is needed, fetch it with
192.Fn mparse_updaterc ;
193.It
194iterate over parse nodes with starting from the
195.Fa first
196member of the returned
197.Vt struct roff_man ;
198.It
199free all allocated memory with
200.Fn mparse_free
201and
202.Xr mchars_free 3 ,
203or invoke
204.Fn mparse_reset
205and go back to step 2 to parse new files.
206.El
207.Sh REFERENCE
208This section documents the functions, types, and variables available
209via
210.In mandoc.h ,
211with the exception of those documented in
212.Xr mandoc_escape 3
213and
214.Xr mchars_alloc 3 .
215.Ss Types
216.Bl -ohang
217.It Vt "enum mandocerr"
218An error or warning message during parsing.
219.It Vt "enum mandoclevel"
220A classification of an
221.Vt "enum mandocerr"
222as regards system operation.
223See the DIAGNOSTICS section in
224.Xr mandoc 1
225regarding the meanings of the levels.
226.It Vt "struct mparse"
227An opaque pointer to a running parse sequence.
228Created with
229.Fn mparse_alloc
230and freed with
231.Fn mparse_free .
232This may be used across parsed input if
233.Fn mparse_reset
234is called between parses.
235.It Vt "mandocmsg"
236A prototype for a function to handle error and warning
237messages emitted by the parser.
238.El
239.Ss Functions
240.Bl -ohang
241.It Fn deroff
242Obtain a text-only representation of a
243.Vt struct roff_node ,
244including text contained in its child nodes.
245To be used on children of the
246.Fa first
247member of
248.Vt struct roff_man .
249When it is no longer needed, the pointer returned from
250.Fn deroff
251can be passed to
252.Xr free 3 .
253.It Fn man_mparse
254Get the parser used for the current output.
255Declared in
256.In man.h ,
257implemented in
258.Pa man.c .
259.It Fn man_validate
260Validate the
261.Dv MACROSET_MAN
262parse tree obtained with
263.Fn mparse_result .
264Declared in
265.In man.h ,
266implemented in
267.Pa man.c .
268.It Fn mdoc_validate
269Validate the
270.Dv MACROSET_MDOC
271parse tree obtained with
272.Fn mparse_result .
273Declared in
274.In mdoc.h ,
275implemented in
276.Pa mdoc.c .
277.It Fn mparse_alloc
278Allocate a parser.
279The arguments have the following effect:
280.Bl -tag -offset 5n -width inttype
281.It Ar options
282When the
283.Dv MPARSE_MDOC
284or
285.Dv MPARSE_MAN
286bit is set, only that parser is used.
287Otherwise, the document type is automatically detected.
288.Pp
289When the
290.Dv MPARSE_SO
291bit is set,
292.Xr roff 7
293.Ic \&so
294file inclusion requests are always honoured.
295Otherwise, if the request is the only content in an input file,
296only the file name is remembered, to be returned in the
297.Fa sodest
298argument of
299.Fn mparse_result .
300.Pp
301When the
302.Dv MPARSE_QUICK
303bit is set, parsing is aborted after the NAME section.
304This is for example useful in
305.Xr makewhatis 8
306.Fl Q
307to quickly build minimal databases.
308.It Ar mmin
309Can be set to
310.Dv MANDOCERR_BASE ,
311.Dv MANDOCERR_STYLE ,
312.Dv MANDOCERR_WARNING ,
313.Dv MANDOCERR_ERROR ,
314.Dv MANDOCERR_UNSUPP ,
315or
316.Dv MANDOCERR_MAX .
317Messages below the selected level will be suppressed.
318.It Ar mmsg
319A callback function to handle errors and warnings.
320See
321.Pa main.c
322for an example.
323If printing of error messages is not desired,
324.Dv NULL
325may be passed.
326.It Ar os_e
327Operating system to check base system conventions for.
328If
329.Dv MANDOC_OS_OTHER ,
330the system is automatically detected from
331.Ic \&Os ,
332.Fl Ios ,
333or
334.Xr uname 3 .
335.It Ar os_s
336A default string for the
337.Xr mdoc 7
338.Ic \&Os
339macro, overriding the
340.Dv OSNAME
341preprocessor definition and the results of
342.Xr uname 3 .
343Passing
344.Dv NULL
345sets no default.
346.El
347.Pp
348The same parser may be used for multiple files so long as
349.Fn mparse_reset
350is called between parses.
351.Fn mparse_free
352must be called to free the memory allocated by this function.
353Declared in
354.In mandoc.h ,
355implemented in
356.Pa read.c .
357.It Fn mparse_free
358Free all memory allocated by
359.Fn mparse_alloc .
360Declared in
361.In mandoc.h ,
362implemented in
363.Pa read.c .
364.It Fn mparse_getkeep
365Acquire the keep buffer.
366Must follow a call of
367.Fn mparse_keep .
368Declared in
369.In mandoc.h ,
370implemented in
371.Pa read.c .
372.It Fn mparse_keep
373Instruct the parser to retain a copy of its parsed input.
374This can be acquired with subsequent
375.Fn mparse_getkeep
376calls.
377Declared in
378.In mandoc.h ,
379implemented in
380.Pa read.c .
381.It Fn mparse_open
382Open the file for reading.
383If that fails and
384.Fa fname
385does not already end in
386.Ql .gz ,
387try again after appending
388.Ql .gz .
389Save the information whether the file is zipped or not.
390Return a file descriptor open for reading or -1 on failure.
391It can be passed to
392.Fn mparse_readfd
393or used directly.
394Declared in
395.In mandoc.h ,
396implemented in
397.Pa read.c .
398.It Fn mparse_readfd
399Parse a file descriptor opened with
400.Xr open 2
401or
402.Fn mparse_open .
403Pass the associated filename in
404.Va fname .
405This function may be called multiple times with different parameters; however,
406.Xr close 2
407and
408.Fn mparse_reset
409should be invoked between parses.
410Declared in
411.In mandoc.h ,
412implemented in
413.Pa read.c .
414.It Fn mparse_reset
415Reset a parser so that
416.Fn mparse_readfd
417may be used again.
418Declared in
419.In mandoc.h ,
420implemented in
421.Pa read.c .
422.It Fn mparse_result
423Obtain the result of a parse.
424One of the two pointers will be filled in.
425Declared in
426.In mandoc.h ,
427implemented in
428.Pa read.c .
429.It Fn mparse_strerror
430Return a statically-allocated string representation of an error code.
431Declared in
432.In mandoc.h ,
433implemented in
434.Pa read.c .
435.It Fn mparse_strlevel
436Return a statically-allocated string representation of a level code.
437Declared in
438.In mandoc.h ,
439implemented in
440.Pa read.c .
441.It Fn mparse_updaterc
442If the highest warning or error level that occurred during the current
443.Fa parse
444is higher than
445.Pf * Fa rc ,
446update
447.Pf * Fa rc
448accordingly.
449This is useful after calling
450.Fn mdoc_validate
451or
452.Fn man_validate .
453Declared in
454.In mandoc.h ,
455implemented in
456.Pa read.c .
457.El
458.Ss Variables
459.Bl -ohang
460.It Va man_macronames
461The string representation of a
462.Xr man 7
463macro as indexed by
464.Vt "enum mant" .
465.It Va mdoc_argnames
466The string representation of an
467.Xr mdoc 7
468macro argument as indexed by
469.Vt "enum mdocargt" .
470.It Va mdoc_macronames
471The string representation of an
472.Xr mdoc 7
473macro as indexed by
474.Vt "enum mdoct" .
475.El
476.Sh IMPLEMENTATION NOTES
477This section consists of structural documentation for
478.Xr mdoc 7
479and
480.Xr man 7
481syntax trees and strings.
482.Ss Man and Mdoc Strings
483Strings may be extracted from mdoc and man meta-data, or from text
484nodes (MDOC_TEXT and MAN_TEXT, respectively).
485These strings have special non-printing formatting cues embedded in the
486text itself, as well as
487.Xr roff 7
488escapes preserved from input.
489Implementing systems will need to handle both situations to produce
490human-readable text.
491In general, strings may be assumed to consist of 7-bit ASCII characters.
492.Pp
493The following non-printing characters may be embedded in text strings:
494.Bl -tag -width Ds
495.It Dv ASCII_NBRSP
496A non-breaking space character.
497.It Dv ASCII_HYPH
498A soft hyphen.
499.It Dv ASCII_BREAK
500A breakable zero-width space.
501.El
502.Pp
503Escape characters are also passed verbatim into text strings.
504An escape character is a sequence of characters beginning with the
505backslash
506.Pq Sq \e .
507To construct human-readable text, these should be intercepted with
508.Xr mandoc_escape 3
509and converted with one the functions described in
510.Xr mchars_alloc 3 .
511.Ss Man Abstract Syntax Tree
512This AST is governed by the ontological rules dictated in
513.Xr man 7
514and derives its terminology accordingly.
515.Pp
516The AST is composed of
517.Vt struct roff_node
518nodes with element, root and text types as declared by the
519.Va type
520field.
521Each node also provides its parse point (the
522.Va line ,
523.Va pos ,
524and
525.Va sec
526fields), its position in the tree (the
527.Va parent ,
528.Va child ,
529.Va next
530and
531.Va prev
532fields) and some type-specific data.
533.Pp
534The tree itself is arranged according to the following normal form,
535where capitalised non-terminals represent nodes.
536.Pp
537.Bl -tag -width "ELEMENTXX" -compact
538.It ROOT
539\(<- mnode+
540.It mnode
541\(<- ELEMENT | TEXT | BLOCK
542.It BLOCK
543\(<- HEAD BODY
544.It HEAD
545\(<- mnode*
546.It BODY
547\(<- mnode*
548.It ELEMENT
549\(<- ELEMENT | TEXT*
550.It TEXT
551\(<- [[:ascii:]]*
552.El
553.Pp
554The only elements capable of nesting other elements are those with
555next-line scope as documented in
556.Xr man 7 .
557.Ss Mdoc Abstract Syntax Tree
558This AST is governed by the ontological
559rules dictated in
560.Xr mdoc 7
561and derives its terminology accordingly.
562.Qq In-line
563elements described in
564.Xr mdoc 7
565are described simply as
566.Qq elements .
567.Pp
568The AST is composed of
569.Vt struct roff_node
570nodes with block, head, body, element, root and text types as declared
571by the
572.Va type
573field.
574Each node also provides its parse point (the
575.Va line ,
576.Va pos ,
577and
578.Va sec
579fields), its position in the tree (the
580.Va parent ,
581.Va child ,
582.Va last ,
583.Va next
584and
585.Va prev
586fields) and some type-specific data, in particular, for nodes generated
587from macros, the generating macro in the
588.Va tok
589field.
590.Pp
591The tree itself is arranged according to the following normal form,
592where capitalised non-terminals represent nodes.
593.Pp
594.Bl -tag -width "ELEMENTXX" -compact
595.It ROOT
596\(<- mnode+
597.It mnode
598\(<- BLOCK | ELEMENT | TEXT
599.It BLOCK
600\(<- HEAD [TEXT] (BODY [TEXT])+ [TAIL [TEXT]]
601.It ELEMENT
602\(<- TEXT*
603.It HEAD
604\(<- mnode*
605.It BODY
606\(<- mnode* [ENDBODY mnode*]
607.It TAIL
608\(<- mnode*
609.It TEXT
610\(<- [[:ascii:]]*
611.El
612.Pp
613Of note are the TEXT nodes following the HEAD, BODY and TAIL nodes of
614the BLOCK production: these refer to punctuation marks.
615Furthermore, although a TEXT node will generally have a non-zero-length
616string, in the specific case of
617.Sq \&.Bd \-literal ,
618an empty line will produce a zero-length string.
619Multiple body parts are only found in invocations of
620.Sq \&Bl \-column ,
621where a new body introduces a new phrase.
622.Pp
623The
624.Xr mdoc 7
625syntax tree accommodates for broken block structures as well.
626The ENDBODY node is available to end the formatting associated
627with a given block before the physical end of that block.
628It has a non-null
629.Va end
630field, is of the BODY
631.Va type ,
632has the same
633.Va tok
634as the BLOCK it is ending, and has a
635.Va pending
636field pointing to that BLOCK's BODY node.
637It is an indirect child of that BODY node
638and has no children of its own.
639.Pp
640An ENDBODY node is generated when a block ends while one of its child
641blocks is still open, like in the following example:
642.Bd -literal -offset indent
643\&.Ao ao
644\&.Bo bo ac
645\&.Ac bc
646\&.Bc end
647.Ed
648.Pp
649This example results in the following block structure:
650.Bd -literal -offset indent
651BLOCK Ao
652    HEAD Ao
653    BODY Ao
654        TEXT ao
655        BLOCK Bo, pending -> Ao
656            HEAD Bo
657            BODY Bo
658                TEXT bo
659                TEXT ac
660                ENDBODY Ao, pending -> Ao
661                TEXT bc
662TEXT end
663.Ed
664.Pp
665Here, the formatting of the
666.Ic \&Ao
667block extends from TEXT ao to TEXT ac,
668while the formatting of the
669.Ic \&Bo
670block extends from TEXT bo to TEXT bc.
671It renders as follows in
672.Fl T Ns Cm ascii
673mode:
674.Pp
675.Dl <ao [bo ac> bc] end
676.Pp
677Support for badly-nested blocks is only provided for backward
678compatibility with some older
679.Xr mdoc 7
680implementations.
681Using badly-nested blocks is
682.Em strongly discouraged ;
683for example, the
684.Fl T Ns Cm html
685front-end to
686.Xr mandoc 1
687is unable to render them in any meaningful way.
688Furthermore, behaviour when encountering badly-nested blocks is not
689consistent across troff implementations, especially when using multiple
690levels of badly-nested blocks.
691.Sh SEE ALSO
692.Xr mandoc 1 ,
693.Xr man.cgi 3 ,
694.Xr mandoc_escape 3 ,
695.Xr mandoc_headers 3 ,
696.Xr mandoc_malloc 3 ,
697.Xr mansearch 3 ,
698.Xr mchars_alloc 3 ,
699.Xr tbl 3 ,
700.Xr eqn 7 ,
701.Xr man 7 ,
702.Xr mandoc_char 7 ,
703.Xr mdoc 7 ,
704.Xr roff 7 ,
705.Xr tbl 7
706.Sh AUTHORS
707.An -nosplit
708The
709.Nm
710library was written by
711.An Kristaps Dzonsons Aq Mt kristaps@bsd.lv
712and is maintained by
713.An Ingo Schwarze Aq Mt schwarze@openbsd.org .
714